ztrie(3)

ztrie(3)

CZMQ Manual - CZMQ/1.4.1

Name

ztrie - simple trie for tokenizable strings

Synopsis

//  This is a draft class, and may change without notice. It is disabled in
//  stable builds by default. If you use this in applications, please ask
//  for it to be pushed to stable state. Use --enable-drafts to enable.
#ifdef CZMQ_BUILD_DRAFT_API
// Callback function for ztrie_node to destroy node data.
typedef void (ztrie_destroy_data_fn) (
    void **data);

//  *** Draft method, for development use, may change without warning ***
//  Creates a new ztrie.
CZMQ_EXPORT ztrie_t *
    ztrie_new (char delimiter);

//  *** Draft method, for development use, may change without warning ***
//  Destroy the ztrie.
CZMQ_EXPORT void
    ztrie_destroy (ztrie_t **self_p);

//  *** Draft method, for development use, may change without warning ***
//  Inserts a new route into the tree and attaches the data. Returns -1
//  if the route already exists, otherwise 0. This method takes ownership of
//  the provided data if a destroy_data_fn is provided.
CZMQ_EXPORT int
    ztrie_insert_route (ztrie_t *self, const char *path, void *data, ztrie_destroy_data_fn destroy_data_fn);

//  *** Draft method, for development use, may change without warning ***
//  Removes a route from the trie and destroys its data. Returns -1 if the
//  route does not exists, otherwise 0.
//  the start of the list call zlist_first (). Advances the cursor.
CZMQ_EXPORT int
    ztrie_remove_route (ztrie_t *self, const char *path);

//  *** Draft method, for development use, may change without warning ***
//  Returns true if the path matches a route in the tree, otherwise false.
CZMQ_EXPORT bool
    ztrie_matches (ztrie_t *self, const char *path);

//  *** Draft method, for development use, may change without warning ***
//  Returns the data of a matched route from last ztrie_matches. If the path
//  did not match, returns NULL. Do not delete the data as it's owned by
//  ztrie.
CZMQ_EXPORT void *
    ztrie_hit_data (ztrie_t *self);

//  *** Draft method, for development use, may change without warning ***
//  Returns the count of parameters that a matched route has.
CZMQ_EXPORT size_t
    ztrie_hit_parameter_count (ztrie_t *self);

//  *** Draft method, for development use, may change without warning ***
//  Returns the parameters of a matched route with named regexes from last
//  ztrie_matches. If the path did not match or the route did not contain any
//  named regexes, returns NULL.
CZMQ_EXPORT zhashx_t *
    ztrie_hit_parameters (ztrie_t *self);

//  *** Draft method, for development use, may change without warning ***
//  Returns the asterisk matched part of a route, if there has been no match
//  or no asterisk match, returns NULL.
CZMQ_EXPORT const char *
    ztrie_hit_asterisk_match (ztrie_t *self);

//  *** Draft method, for development use, may change without warning ***
//  Print the trie
CZMQ_EXPORT void
    ztrie_print (ztrie_t *self);

//  *** Draft method, for development use, may change without warning ***
//  Self test of this class.
CZMQ_EXPORT void
    ztrie_test (bool verbose);

#endif // CZMQ_BUILD_DRAFT_API
Please add '@interface' section in './../src/ztrie.c'.

Description

This is a variant of a trie or prefix tree where all the descendants of a node have a common prefix of the string associated with that node. This implementation is specialized for strings that can be tokenized by a delimiter like a URL, URI or URN. Routes in the tree can be matched by regular expressions and by using capturing groups parts of a matched route can be easily obtained.

Note that the performance for pure string based matching is okay but on short strings zhash and zhashx are 3-4 times faster.

Example

From ztrie_test method

// Create a new trie for matching strings that can be tokenized by a slash
// (e.g. URLs minus the protocol, address and port).
ztrie_t *self = ztrie_new ('/');
assert (self);

int ret = 0;

// Let's start by inserting a couple of routes into the trie.
// This one is for the route '/foo/bar' the slash at the beginning of the
// route is important because everything before the first delimiter will be
// discarded. A slash at the end of a route is optional though. The data
// associated with this node is passed without destroy function which means
// it must be destroyed by the caller.
int foo_bar_data = 10;
ret = ztrie_insert_route (self, "/foo/bar", &foo_bar_data, NULL);
assert (ret == 0);

// Now suppose we like to match all routes with two tokens that start with
// '/foo/' but aren't '/foo/bar'. This is possible by using regular
// expressions which are enclosed in an opening and closing curly bracket.
// Tokens that contain regular expressions are always match after string
// based tokens.
// Note: There is no order in which regular expressions are sorted thus
// if you enter multiple expressions for a route you will have to make
// sure they don't have overlapping results. For example '/foo/{[^/]+}'
// and '/foo/{\d+} having could turn out badly.
int foo_other_data = 100;
ret = ztrie_insert_route (self, "/foo/{[^/]+}", &foo_other_data, NULL);
assert (ret == 0);

// Regular expression are only matched against tokens of the same level.
// This allows us to append to are route with a regular expression as if
// it were a string.
ret = ztrie_insert_route (self, "/foo/{[^/]+}/gulp", NULL, NULL);
assert (ret == 0);

// Routes are identified by their endpoint, which is the last token of the route.
// It is possible to insert routes for a node that already exists but isn't an
// endpoint yet. The delimiter at the end of a route is optional and has no effect.
ret = ztrie_insert_route (self, "/foo/", NULL, NULL);
assert (ret == 0);

// If you try to insert a route which already exists the method will return -1.
ret = ztrie_insert_route (self, "/foo", NULL, NULL);
assert (ret == -1);

// It is not allowed to insert routes with empty tokens.
ret = ztrie_insert_route (self, "//foo", NULL, NULL);
assert (ret == -1);

// Everything before the first delimiter is ignored so 'foo/bar/baz' is equivalent
// to '/bar/baz'.
ret = ztrie_insert_route (self, "foo/bar/baz", NULL, NULL);
assert (ret == 0);
ret = ztrie_insert_route (self, "/bar/baz", NULL, NULL);
assert (ret == -1);

// Of course you are allowed to remove routes, in case there is data associated with a
// route and a destroy data function has been supplied that data will be destroyed.
ret = ztrie_remove_route (self, "/foo");
assert (ret == 0);

// Removing a non existent route will as well return -1.
ret = ztrie_remove_route (self, "/foo");
assert (ret == -1);

// Removing a route with a regular expression must exactly match the entered one.
ret = ztrie_remove_route (self, "/foo/{[^/]+}");
assert (ret == 0);

// Next we like to match a path by regular expressions and also extract matched
// parts of a route. This can be done by naming the regular expression. The name of a
// regular expression is entered at the beginning of the curly brackets and separated
// by a colon from the regular expression. The first one in this examples is named
// 'name' and names the expression '[^/]'. If there is no capturing group defined in
// the expression the whole matched string will be associated with this parameter. In
// case you don't like the get the whole matched string use a capturing group, like
// it has been done for the 'id' parameter. This is nice but you can even match as
// many parameter for a token as you like. Therefore simply put the parameter names
// separated by colons in front of the regular expression and make sure to add a
// capturing group for each parameter. The first parameter will be associated with
// the first capturing and so on.
char *data = (char *) malloc (80);
sprintf (data, "%s", "Hello World!");
ret = ztrie_insert_route (self, "/baz/{name:[^/]+}/{id:--(\\d+)}/{street:nr:(\\a+)(\\d+)}", data, NULL);
assert (ret == 0);

// There is a lot you can do with regular expression but matching routes
// of arbitrary length wont work. Therefore we make use of the asterisk
// operator. Just place it at the end of your route, e.g. '/config/bar/*'.
ret = ztrie_insert_route (self, "/config/bar/*", NULL, NULL);
assert (ret == 0);

// Appending to an asterisk as you would to with a regular expression
// isn't valid.
ret = ztrie_insert_route (self, "/config/bar/*/bar", NULL, NULL);
assert (ret == -1);

// The asterisk operator will only work as a leaf in the tree. If you
// enter an asterisk in the middle of your route it will simply be
// interpreted as a string.
ret = ztrie_insert_route (self, "/test/*/bar", NULL, NULL);
assert (ret == 0);

// If a parent has an asterisk as child it is not allowed to have
// other siblings.
ret = ztrie_insert_route (self, "/config/bar/foo/glup", NULL, NULL);
assert (ret != 0);

// Test matches
bool hasMatch = false;

// The route '/bar/foo' will fail to match as this route has never been inserted.
hasMatch = ztrie_matches (self, "/bar/foo");
assert (!hasMatch);

// The route '/foo/bar' will match and we can obtain the data associated with it.
hasMatch = ztrie_matches (self, "/foo/bar");
assert (hasMatch);
int foo_bar_hit_data = *((int *) ztrie_hit_data (self));
assert (foo_bar_data == foo_bar_hit_data);

// This route is part of another but is no endpoint itself thus the matches will fail.
hasMatch = ztrie_matches (self, "/baz/blub");
assert (!hasMatch);

// This route will match our named regular expressions route. Thus we can extract data
// from the route by their names.
hasMatch = ztrie_matches (self, "/baz/blub/--11/abc23");
assert (hasMatch);
char *match_data = (char *) ztrie_hit_data (self);
assert (streq ("Hello World!", match_data));
zhashx_t *parameters = ztrie_hit_parameters (self);
assert (zhashx_size (parameters) == 4);
assert (streq ("blub", (char *) zhashx_lookup (parameters, "name")));
assert (streq ("11", (char *) zhashx_lookup (parameters, "id")));
assert (streq ("abc", (char *) zhashx_lookup (parameters, "street")));
assert (streq ("23", (char *) zhashx_lookup (parameters, "nr")));
zhashx_destroy (&parameters);

// This will match our asterisk route '/config/bar/*'. As the result we
// can obtain the asterisk matched part of the route.
hasMatch = ztrie_matches (self, "/config/bar/foo/bar");
assert (hasMatch);
assert (streq (ztrie_hit_asterisk_match (self), "foo/bar"));

zstr_free (&data); ztrie_destroy (&self);

Authors

The CZMQ manual was written by Pieter Hintjens<moc.xitami|hp#moc.xitami|hp>.

Resources

Main web site: http://czmq.zeromq.org/

Report bugs to the ØMQ development mailing list: <gro.qmorez.stsil|ved-qmorez#gro.qmorez.stsil|ved-qmorez>

Copyright

Copyright (c) 1991-2010 iMatix Corporation and contributors. License LGPLv3+: GNU LGPL 3 or later <http://gnu.org/licenses/lgpl.html>. This is free software: you are free to change it and redistribute it. There is NO WARRANTY, to the extent permitted by law. For details see the files COPYING and COPYING.LESSER included with the CZMQ distribution.