diff options
author | michael-grunder <michael.grunder@gmail.com> | 2019-02-09 05:08:17 +0300 |
---|---|---|
committer | michael-grunder <michael.grunder@gmail.com> | 2019-02-09 05:08:17 +0300 |
commit | 9f0d7bc0a4d3bbf5a539b855a5d1c32abf9f2300 (patch) | |
tree | e6a539817659f246c7301f72df82d9506a6bbc71 | |
parent | f9928642b5e539bbdca43ec51ed9c9642cb42ded (diff) |
WIP: Reimplementation of cluster slot caching
RedisCluster currently has a high construction overhead because
every request has to issue a CLUSTER SLOTS command to map the
keyspace. The issue is especially evident when a request only
does a few commands.
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | cluster_library.c | 177 | ||||
-rw-r--r-- | cluster_library.h | 59 | ||||
-rw-r--r-- | common.h | 1 | ||||
-rw-r--r-- | redis.c | 13 | ||||
-rw-r--r-- | redis_cluster.c | 154 |
6 files changed, 362 insertions, 43 deletions
@@ -16,3 +16,4 @@ mkinstalldirs run-tests.php idea/* .cquery +tags diff --git a/cluster_library.c b/cluster_library.c index bcc5fb07..8682929d 100644 --- a/cluster_library.c +++ b/cluster_library.c @@ -651,6 +651,9 @@ cluster_node_create(redisCluster *c, char *host, size_t host_len, node->slave = slave; node->slaves = NULL; + /* Initialize our list of slot ranges */ + zend_llist_init(&node->slots, sizeof(redisSlotRange), NULL, 0); + // Attach socket node->sock = redis_sock_create(host, host_len, port, c->timeout, c->read_timeout, c->persistent, NULL, 0); @@ -690,10 +693,11 @@ cluster_node_add_slave(redisClusterNode *master, redisClusterNode *slave) /* Use the output of CLUSTER SLOTS to map our nodes */ static int cluster_map_slots(redisCluster *c, clusterReply *r) { + redisClusterNode *pnode, *master, *slave; + redisSlotRange range; int i,j, hlen, klen; short low, high; clusterReply *r2, *r3; - redisClusterNode *pnode, *master, *slave; unsigned short port; char *host, key[1024]; @@ -746,6 +750,10 @@ static int cluster_map_slots(redisCluster *c, clusterReply *r) { for (j = low; j<= high; j++) { c->master[j] = master; } + + /* Append to our list of slot ranges */ + range.low = low; range.high = high; + zend_llist_add_element(&master->slots, &range); } // Success @@ -758,7 +766,10 @@ PHP_REDIS_API void cluster_free_node(redisClusterNode *node) { zend_hash_destroy(node->slaves); efree(node->slaves); } + + zend_llist_destroy(&node->slots); redis_free_socket(node->sock); + efree(node); } @@ -802,6 +813,23 @@ static void ht_free_node(zval *data) { cluster_free_node(node); } +/* zend_llist of slot ranges -> persistent array */ +static redisSlotRange *slot_range_list_clone(zend_llist *src, size_t *count) { + redisSlotRange *dst, *range; + size_t i = 0; + + *count = zend_llist_count(src); + dst = pemalloc(*count * sizeof(*dst), 1); + + range = zend_llist_get_first(src); + while (range) { + memcpy(&dst[i++], range, sizeof(*range)); + range = zend_llist_get_next(src); + } + + return dst; +} + /* Construct a redisCluster object */ PHP_REDIS_API redisCluster *cluster_create(double timeout, double read_timeout, int failover, int persistent) @@ -860,10 +888,49 @@ cluster_free(redisCluster *c, int free_ctx TSRMLS_DC) /* Free any error we've got */ if (c->err) zend_string_release(c->err); + /* Invalidate our cache if we were redirected during operation */ + if (c->cache_key) { + if (c->redirections) { + zend_hash_del(&EG(persistent_list), c->cache_key); + } + zend_string_release(c->cache_key); + } + /* Free structure itself */ if (free_ctx) efree(c); } +/* Create a cluster slot cache structure */ +PHP_REDIS_API +redisCachedCluster *cluster_cache_create(zend_string *hash, HashTable *nodes) { + redisCachedCluster *cc; + redisCachedMaster *cm; + redisClusterNode *node; + + cc = pecalloc(1, sizeof(*cc), 1); + cc->hash = zend_string_dup(hash, 1); + + /* Copy nodes */ + cc->master = pecalloc(zend_hash_num_elements(nodes), sizeof(*cc->master), 1); + ZEND_HASH_FOREACH_PTR(nodes, node) { + /* Skip slaves */ + if (node->slave) continue; + + cm = &cc->master[cc->count]; + + /* Duplicate host/port and clone slot ranges */ + cm->host.addr = zend_string_dup(node->sock->host, 1); + cm->host.port = node->sock->port; + + /* Copy over slot ranges */ + cm->slot = slot_range_list_clone(&node->slots, &cm->slots); + + cc->count++; + } ZEND_HASH_FOREACH_END(); + + return cc; +} + /* Takes our input hash table and returns a straigt C array with elements, * which have been randomized. The return value needs to be freed. */ static zval **cluster_shuffle_seeds(HashTable *seeds, int *len) { @@ -892,6 +959,107 @@ static zval **cluster_shuffle_seeds(HashTable *seeds, int *len) { return z_seeds; } +static void cluster_free_cached_master(redisCachedMaster *cm) { + size_t i; + + /* Free each slave entry */ + for (i = 0; i < cm->slaves; i++) { + zend_string_release(cm->slave[i].addr); + } + + /* Free other elements */ + zend_string_release(cm->host.addr); + pefree(cm->slave, 1); + pefree(cm->slot, 1); +} + +static redisClusterNode* +cached_master_clone(redisCluster *c, redisCachedMaster *cm) { + redisClusterNode *node; + size_t i; + + node = cluster_node_create(c, ZSTR_VAL(cm->host.addr), ZSTR_LEN(cm->host.addr), + cm->host.port, cm->slot[0].low, 0); + + /* Now copy in our slot ranges */ + for (i = 0; i < cm->slots; i++) { + zend_llist_add_element(&node->slots, &cm->slot[i]); + } + + return node; +} + +/* Destroy a persistent cached cluster */ +PHP_REDIS_API void cluster_cache_free(redisCachedCluster *rcc) { + size_t i; + + /* Free masters */ + for (i = 0; i < rcc->count; i++) { + cluster_free_cached_master(&rcc->master[i]); + } + + /* Free hash key */ + zend_string_release(rcc->hash); + pefree(rcc->master, 1); + pefree(rcc, 1); +} + +/* Initialize cluster from cached slots */ +PHP_REDIS_API +void cluster_init_cache(redisCluster *c, redisCachedCluster *cc) { + RedisSock *sock; + redisClusterNode *mnode, *slave; + redisCachedMaster *cm; + char key[HOST_NAME_MAX]; + size_t keylen, i, j, s; + int *map; + + /* Randomize seeds */ + map = emalloc(sizeof(*map) * cc->count); + for (i = 0; i < cc->count; i++) map[i] = i; + fyshuffle(map, cc->count); + + /* Iterate over masters */ + for (i = 0; i < cc->count; i++) { + /* Grab the next master */ + cm = &cc->master[map[i]]; + + /* Hash our host and port */ + keylen = snprintf(key, sizeof(key), "%s:%u", ZSTR_VAL(cm->host.addr), + cm->host.port); + + /* Create socket */ + sock = redis_sock_create(ZSTR_VAL(cm->host.addr), ZSTR_LEN(cm->host.addr), cm->host.port, + c->timeout, c->read_timeout, c->persistent, + NULL, 0); + + /* Add to seed nodes */ + zend_hash_str_update_ptr(c->seeds, key, keylen, sock); + + /* Create master node */ + mnode = cached_master_clone(c, cm); + + /* Add our master */ + zend_hash_str_update_ptr(c->nodes, key, keylen, mnode); + + /* Attach any slaves */ + for (s = 0; s < cm->slaves; s++) { + zend_string *host = cm->slave[s].addr; + slave = cluster_node_create(c, ZSTR_VAL(host), ZSTR_LEN(host), cm->slave[s].port, 0, 1); + cluster_node_add_slave(mnode, slave); + } + + /* Hook up direct slot access */ + for (j = 0; j < cm->slots; j++) { + for (s = cm->slot[j].low; s <= cm->slot[j].high; s++) { + c->master[s] = mnode; + } + } + } + + efree(map); +} + /* Initialize seeds */ PHP_REDIS_API int cluster_init_seeds(redisCluster *cluster, HashTable *ht_seeds) { @@ -908,6 +1076,7 @@ cluster_init_seeds(redisCluster *cluster, HashTable *ht_seeds) { if ((z_seed = z_seeds[i]) == NULL) continue; ZVAL_DEREF(z_seed); + /* Has to be a string */ if (Z_TYPE_P(z_seed) != IS_STRING) continue; @@ -940,7 +1109,7 @@ cluster_init_seeds(redisCluster *cluster, HashTable *ht_seeds) { efree(z_seeds); // Success if at least one seed seems valid - return zend_hash_num_elements(cluster->seeds) > 0 ? 0 : -1; + return zend_hash_num_elements(cluster->seeds) > 0 ? SUCCESS : FAILURE; } /* Initial mapping of our cluster keyspace */ @@ -977,10 +1146,10 @@ PHP_REDIS_API int cluster_map_keyspace(redisCluster *c TSRMLS_DC) { zend_throw_exception(redis_cluster_exception_ce, "Couldn't map cluster keyspace using any provided seed", 0 TSRMLS_CC); - return -1; + return FAILURE; } - return 0; + return SUCCESS; } /* Parse the MOVED OR ASK redirection payload when we get such a response diff --git a/cluster_library.h b/cluster_library.h index 62f7f9da..45223308 100644 --- a/cluster_library.h +++ b/cluster_library.h @@ -143,22 +143,43 @@ typedef enum CLUSTER_REDIR_TYPE { /* MULTI BULK response callback typedef */ typedef int (*mbulk_cb)(RedisSock*,zval*,long long, void* TSRMLS_DC); -/* Specific destructor to free a cluster object */ -// void redis_destructor_redis_cluster(zend_resource *rsrc TSRMLS_DC); +/* A list of covered slot ranges */ +typedef struct redisSlotRange { + unsigned short low; + unsigned short high; +} redisSlotRange; + +/* Simple host/port information for our cache */ +typedef struct redisCachedHost { + zend_string *addr; + unsigned short port; +} redisCachedHost; + +/* Storage for a cached master node */ +typedef struct redisCachedMaster { + redisCachedHost host; + + redisSlotRange *slot; /* Slots and count */ + size_t slots; + + redisCachedHost *slave; /* Slaves and their count */ + size_t slaves; +} redisCachedMaster; + +typedef struct redisCachedCluster { + // int rsrc_id; /* Zend resource ID */ + zend_string *hash; /* What we're cached by */ + redisCachedMaster *master; /* Array of masters */ + size_t count; /* Number of masters */ +} redisCachedCluster; /* A Redis Cluster master node */ typedef struct redisClusterNode { - /* Our Redis socket in question */ - RedisSock *sock; - - /* A slot where one of these lives */ - short slot; - - /* Is this a slave node */ - unsigned short slave; - - /* A HashTable containing any slaves */ - HashTable *slaves; + RedisSock *sock; /* Our Redis socket in question */ + short slot; /* One slot we believe this node serves */ + zend_llist slots; /* List of all slots we believe this node serves */ + unsigned short slave; /* Are we a slave */ + HashTable *slaves; /* Hash table of slaves */ } redisClusterNode; /* Forward declarations */ @@ -208,6 +229,11 @@ typedef struct redisCluster { /* Flag for when we get a CLUSTERDOWN error */ short clusterdown; + /* Key to our persistent list cache and number of redirections we've + * received since construction */ + zend_string *cache_key; + uint64_t redirections; + /* The last ERROR we encountered */ zend_string *err; @@ -362,6 +388,13 @@ PHP_REDIS_API int cluster_init_seeds(redisCluster *c, HashTable *ht_seeds); PHP_REDIS_API int cluster_map_keyspace(redisCluster *c TSRMLS_DC); PHP_REDIS_API void cluster_free_node(redisClusterNode *node); +/* Functions for interacting with cached slots maps */ +PHP_REDIS_API redisCachedCluster *cluster_cache_create(zend_string *hash, HashTable *nodes); +PHP_REDIS_API void cluster_cache_free(redisCachedCluster *rcc); +PHP_REDIS_API void cluster_init_cache(redisCluster *c, redisCachedCluster *rcc); + +/* Functions to facilitate cluster slot caching */ + PHP_REDIS_API char **cluster_sock_read_multibulk_reply(RedisSock *redis_sock, int *len TSRMLS_DC); @@ -6,6 +6,7 @@ #define PHPREDIS_NOTUSED(v) ((void)v) +#include "zend_llist.h" #include <ext/standard/php_var.h> #include <ext/standard/php_math.h> #include <zend_smart_str.h> @@ -50,6 +50,8 @@ extern zend_class_entry *redis_cluster_exception_ce; zend_class_entry *redis_ce; zend_class_entry *redis_exception_ce; +extern int le_cluster_slot_cache; + extern zend_function_entry redis_array_functions[]; extern zend_function_entry redis_cluster_functions[]; @@ -71,6 +73,7 @@ PHP_INI_BEGIN() PHP_INI_ENTRY("redis.arrays.consistent", "0", PHP_INI_ALL, NULL) /* redis cluster */ + PHP_INI_ENTRY("redis.clusters.cache_slots", "0", PHP_INI_ALL, NULL) PHP_INI_ENTRY("redis.clusters.auth", "", PHP_INI_ALL, NULL) PHP_INI_ENTRY("redis.clusters.persistent", "0", PHP_INI_ALL, NULL) PHP_INI_ENTRY("redis.clusters.read_timeout", "0", PHP_INI_ALL, NULL) @@ -549,6 +552,12 @@ free_reply_callbacks(RedisSock *redis_sock) redis_sock->current = NULL; } +/* Passthru for destroying cluster cache */ +static void cluster_cache_dtor(zend_resource *rsrc) { + redisCachedCluster *rcc = (redisCachedCluster*)rsrc->ptr; + cluster_cache_free(rcc); +} + void free_redis_object(zend_object *object) { @@ -731,6 +740,10 @@ PHP_MINIT_FUNCTION(redis) redis_cluster_ce = zend_register_internal_class(&redis_cluster_class_entry TSRMLS_CC); redis_cluster_ce->create_object = create_cluster_context; + /* Register our cluster cache list item */ + le_cluster_slot_cache = zend_register_list_destructors_ex(NULL, cluster_cache_dtor, + "Redis cluster slot cache", + module_number); /* Base Exception class */ #if HAVE_SPL diff --git a/redis_cluster.c b/redis_cluster.c index fd42db36..f7695f97 100644 --- a/redis_cluster.c +++ b/redis_cluster.c @@ -33,6 +33,7 @@ #include <SAPI.h> zend_class_entry *redis_cluster_ce; +int le_cluster_slot_cache; /* Exception handler */ zend_class_entry *redis_cluster_exception_ce; @@ -40,6 +41,10 @@ zend_class_entry *redis_cluster_exception_ce; /* Handlers for RedisCluster */ zend_object_handlers RedisCluster_handlers; +/* Helper when throwing normal cluster exceptions */ +#define CLUSTER_THROW_EXCEPTION(msg) \ + zend_throw_exception(redis_cluster_exception_ce, msg, 0 TSRMLS_CC); + ZEND_BEGIN_ARG_INFO_EX(arginfo_ctor, 0, 0, 1) ZEND_ARG_INFO(0, name) ZEND_ARG_ARRAY_INFO(0, seeds, 0) @@ -344,50 +349,146 @@ void free_cluster_context(zend_object *object) { zend_object_std_dtor(&cluster->std TSRMLS_CC); } -/* Attempt to connect to a Redis cluster provided seeds and timeout options */ -static void redis_cluster_init(redisCluster *c, HashTable *ht_seeds, double timeout, - double read_timeout, int persistent, char *auth, - size_t auth_len TSRMLS_DC) -{ - // Validate timeout - if (timeout < 0L || timeout > INT_MAX) { - zend_throw_exception(redis_cluster_exception_ce, - "Invalid timeout", 0 TSRMLS_CC); +/* Turn a seed array into a zend_string we can use to look up a slot cache */ +static zend_string *cluster_hash_seeds(HashTable *ht) { + smart_str hash = {0}; + zend_string *zstr; + zval *z_seed; + + ZEND_HASH_FOREACH_VAL(ht, z_seed) { + zstr = zval_get_string(z_seed); + smart_str_appendc(&hash, '['); + smart_str_appendl(&hash, ZSTR_VAL(zstr), ZSTR_LEN(zstr)); + smart_str_appendc(&hash, ']'); + zend_string_release(zstr); + } ZEND_HASH_FOREACH_END(); + + /* Not strictly needed but null terminate anyway */ + smart_str_0(&hash); + + /* smart_str is a zend_string internally */ + return hash.s; +} + +#define CACHING_ENABLED() (INI_INT("redis.clusters.cache_slots") == 1) +static redisCachedCluster *cluster_cache_load(HashTable *ht_seeds TSRMLS_DC) { + zend_resource *le; + zend_string *h; + + /* Short circuit if we're not caching slots or if our seeds don't have any + * elements, since it doesn't make sense to cache an empty string */ + if (!CACHING_ENABLED() || zend_hash_num_elements(ht_seeds) == 0) + return NULL; + + /* Look for cached slot information */ + h = cluster_hash_seeds(ht_seeds); + le = zend_hash_str_find_ptr(&EG(persistent_list), ZSTR_VAL(h), ZSTR_LEN(h)); + zend_string_release(h); + + if (le != NULL) { + /* Sanity check on our list type */ + if (le->type != le_cluster_slot_cache) { + php_error_docref(0 TSRMLS_CC, E_WARNING, "Invalid slot cache resource"); + return NULL; + } + + /* Success, return the cached entry */ + return le->ptr; } - // Validate our read timeout + /* Not found */ + return NULL; +} + +/* Cache a cluster's slot information in persistent_list if it's enabled */ +static int cluster_cache_store(HashTable *ht_seeds, HashTable *nodes TSRMLS_DC) { + redisCachedCluster *cc; + zend_string *hash; + + /* Short circuit if caching is disabled or there aren't any seeds */ + if (!CACHING_ENABLED() || zend_hash_num_elements(ht_seeds) == 0) + return !CACHING_ENABLED() ? SUCCESS : FAILURE; + + /* Construct our cache */ + hash = cluster_hash_seeds(ht_seeds); + cc = cluster_cache_create(hash, nodes); + zend_string_release(hash); + + /* Set up our resource */ +#if PHP_VERSION_ID < 70300 + zend_resource le; + le.type = le_cluster_slot_cache; + le.ptr = cc; + + zend_hash_update_mem(&EG(persistent_list), cc->hash, (void*)&le, sizeof(zend_resource)); +#else + zend_register_persistent_resource_ex(cc->hash, cc, le_cluster_slot_cache); +#endif + + return SUCCESS; +} + +/* Validate redis cluster construction arguments */ +static int +cluster_validate_args(double timeout, double read_timeout, HashTable *seeds) { + if (timeout < 0L || timeout > INT_MAX) { + CLUSTER_THROW_EXCEPTION("Invalid timeout"); + return FAILURE; + } if (read_timeout < 0L || read_timeout > INT_MAX) { - zend_throw_exception(redis_cluster_exception_ce, - "Invalid read timeout", 0 TSRMLS_CC); + CLUSTER_THROW_EXCEPTION("Invalid read timeout"); + return FAILURE; + } + if (zend_hash_num_elements(seeds) == 0) { + CLUSTER_THROW_EXCEPTION("Must pass seeds"); + return FAILURE; } - /* Make sure there are some seeds */ - if (zend_hash_num_elements(ht_seeds) == 0) { - zend_throw_exception(redis_cluster_exception_ce, - "Must pass seeds", 0 TSRMLS_CC); + return SUCCESS; +} + +static int cluster_init_from_seeds(redisCluster *c, HashTable *seeds TSRMLS_DC) { + int rv1 = cluster_init_seeds(c, seeds); + int rv2 = cluster_map_keyspace(c TSRMLS_CC); + if (rv1 == SUCCESS && rv2 == SUCCESS) { + return SUCCESS; + } else { + return FAILURE; } +} + +//static int cluster_init_from_cache(redisCluster *c, redisCachedCluster *rcc TSRMLS_DC) { +// cluster_init_cache(c, rcc); +// return cluster_map_keyspace(c TSRMLS_CC); +//} + +/* Attempt to connect to a Redis cluster provided seeds and timeout options */ +static void redis_cluster_init(redisCluster *c, HashTable *ht_seeds, double timeout, + double read_timeout, int persistent, char *auth, + size_t auth_len TSRMLS_DC) +{ + redisCachedCluster *cc; + + cluster_validate_args(timeout, read_timeout, ht_seeds); if (auth && auth_len > 0) { c->auth = zend_string_init(auth, auth_len, 0); } - /* Set our timeout and read_timeout which we'll pass through to the - * socket type operations */ c->timeout = timeout; c->read_timeout = read_timeout; - - /* Set our option to use or not use persistent connections */ c->persistent = persistent; /* Calculate the number of miliseconds we will wait when bouncing around, * (e.g. a node goes down), which is not the same as a standard timeout. */ c->waitms = (long)(timeout * 1000); - // Initialize our RedisSock "seed" objects - cluster_init_seeds(c, ht_seeds); - - // Create and map our key space - cluster_map_keyspace(c TSRMLS_CC); + /* Attempt to load from cache */ + if ((cc = cluster_cache_load(ht_seeds TSRMLS_CC))) { + cluster_init_cache(c, cc); + } else if (cluster_init_from_seeds(c, ht_seeds) == SUCCESS) { + cluster_cache_store(ht_seeds, c->nodes TSRMLS_CC); + } } /* Attempt to load a named cluster configured in php.ini */ @@ -913,7 +1014,7 @@ static void cluster_generic_delete(INTERNAL_FUNCTION_PARAMETERS, char *kw, int kw_len) { zval *z_ret = emalloc(sizeof(*z_ret)); - + // Initialize a LONG value to zero for our return ZVAL_LONG(z_ret, 0); @@ -3077,3 +3178,4 @@ PHP_METHOD(RedisCluster, command) { } /* vim: set tabstop=4 softtabstop=4 expandtab shiftwidth=4: */ + |