Skip to content

SetTrie

settrie.SetTrie

Result(iter_id, auto_serialize=False)

Container holding the results of several operations of SetTrie. It behaves, basically, like an iterator.

Source code in settrie/SetTrie.py
51
52
53
54
55
56
57
def __init__(self, iter_id, auto_serialize = False):
    self.iter_id = iter_id
    self.as_is   = True
    if auto_serialize:
        self.as_is     = False
        self.to_string = re.compile("^'(.+)'$")
        self.to_float  = re.compile('^.*\\..*$')

SetTrie(binary_image=None)

Mapping container for efficient storage of key-value pairs where the keys are sets. Uses an efficient trie implementation. Supports querying for values associated to subsets or supersets of stored key sets.

Example
>>> from settrie import SetTrie
>>>
>>> # Create a SetTrie object
>>> stt = SetTrie()
>>>
>>> # Insert some sets
>>> stt.insert({2, 3}, 'id1')
>>> stt.insert({2, 3, 4.4}, 'id2')
>>> stt.insert({'Mon', 'Tue'}, 'days')
>>>
>>> # Find id by set
>>> print(stt.find({2, 3}))
>>>
>>> # Find ids of all supersets
>>> for id in stt.supersets({2, 3}):
>>>     print(id)
>>>
>>> # Find ids of all subsets
>>> for id in stt.subsets({2, 3}):
>>>     print(id)
>>>
>>> # Nested iteration over the sets and elements
>>> for st in stt:
>>>     print(st.id)
>>>     for e in st.elements:
>>>         print('  ', e)
>>>
>>> # Store as a pickle file file
>>> import pickle
>>> with open('my_settrie.pickle', 'wb') as f:
>>>     pickle.dump(stt, f)
>>>
>>> # Load from a pickle file
>>> with open('my_settrie.pickle', 'rb') as f:
>>>     tt = pickle.load(f)
>>>
>>> # Check that they are identical
>>> for t, st in zip(tt, stt):
>>>     assert t.id == st.id
>>>     for et, est in zip(t.elements, st.elements):
>>>         assert et == est
>>>
>>> # Remove sets by id
>>> stt.remove('id2')
>>> stt.remove('days')
>>>
>>> # After many .remove() calls, the tree has nodes marked as dirty,
>>> # calling .purge() removes them completely and frees RAM.
>>> stt.purge()
Source code in settrie/SetTrie.py
162
163
164
165
166
167
def __init__(self, binary_image=None):
    self.st_id	 = new_settrie()
    self.set_id	 = -1
    self.int_ids = None
    if binary_image is not None:
        self.load_from_binary_image(binary_image)

__getstate__()

Used by pickle.dump() (See https://docs.python.org/3/library/pickle.html)

Source code in settrie/SetTrie.py
189
190
191
192
def __getstate__(self):
    """ Used by pickle.dump() (See https://docs.python.org/3/library/pickle.html)
    """
    return self.save_as_binary_image()

__setstate__(state)

Used by pickle.load() (See https://docs.python.org/3/library/pickle.html)

Source code in settrie/SetTrie.py
194
195
196
197
198
def __setstate__(self, state):
    """ Used by pickle.load() (See https://docs.python.org/3/library/pickle.html)
    """
    self.st_id = new_settrie()
    self.load_from_binary_image(state)

find(set)

Finds the ID of the set matching the one provided.

Parameters:

Name Type Description Default
set set

Set for searching

required

Returns:

Type Description
str

id of the set with the exact match. An empty string if no match was found.

Source code in settrie/SetTrie.py
210
211
212
213
214
215
216
217
218
def find(self, set) -> str:
    """ Finds the ID of the set matching the one provided.

    Args:
        set (set): Set for searching
    Returns:
        id of the set with the exact match. An empty string if no match was found.
    """
    return find(self.st_id, str(set))

insert(set, id)

Inserts a new set into a SetTrie object.

Parameters:

Name Type Description Default
set Set

Set to add

required
id str

String representing the ID for the test

required
Source code in settrie/SetTrie.py
200
201
202
203
204
205
206
207
208
def insert(self, set: Set, id: str):
    """ Inserts a new set into a SetTrie object.

    Args:
        set: Set to add
        id: String representing the ID for the test
    """
    self.int_ids = None
    insert(self.st_id, str(set), id)

load_from_binary_image(binary_image)

Load the state of the c++ SetTrie object from a binary_image returned by a previous save_as_binary_image() call.

Parameters:

Name Type Description Default
binary_image list

A list of strings returned by save_as_binary_image()

required

Returns:

Type Description
bool

True on success, destroys, initializes and returns false on failure.

Source code in settrie/SetTrie.py
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
def load_from_binary_image(self, binary_image):
    """ Load the state of the c++ SetTrie object from a binary_image
        returned by a previous save_as_binary_image() call.

    Args:
        binary_image (list): A list of strings returned by save_as_binary_image()

    Returns:
        (bool): True on success, destroys, initializes and returns false on failure.
    """
    self.int_ids = None

    failed = False

    for binary_image_block in binary_image:
        if not push_binary_image_block(self.st_id, binary_image_block):
            failed = True
            break

    if not failed:
        failed = not push_binary_image_block(self.st_id, '')

    self.set_id = -1

    if failed:
        destroy_settrie(self.st_id)
        self.st_id = new_settrie()
        return False

    return True

purge()

Purges (reassigns node integer ids and frees RAM) after a series of remove() calls.

If you need to remove multiple elements, it is more efficient to avoid inserting or purging between remove() calls. Call purge() just once after you have finished removing.

Returns:

Type Description
int

The number of tree nodes freed.

Source code in settrie/SetTrie.py
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
def purge(self):
    """ Purges (reassigns node integer ids and frees RAM) after a series of remove() calls.

    If you need to remove multiple elements, it is more efficient to avoid inserting or purging between remove() calls.
    Call purge() just once after you have finished removing.

    Returns:
        (int): The number of tree nodes freed.
    """
    self.set_id	= -1

    size = purge(self.st_id, 1) # dry run

    if size == 0:
        return 0

    self.int_ids = None

    purge(self.st_id, 0)

    return size

remove(id)

Removes a set from the object either by string identifier or by its unique integer id.

If you need to remove multiple elements, it is more efficient to avoid inserting or purging between remove() calls. The object needs to build a dictionary with all the unique integer ids. Since ids change by insert() and purge() calls, the dictionary will be rebuilt each time, which requires iterating over the whole object.

Parameters:

Name Type Description Default
id str

Either the same string used as the id when the set was inserted via insert() or the unique integer id, if known. The latter is faster. The unique integer id is the id used by the iterator when you iterate over the whole object to identify the specific set.

required

Returns:

Type Description
int

Zero if the set was removed, a negative integer code on error.

Source code in settrie/SetTrie.py
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
def remove(self, id):
    """ Removes a set from the object either by string identifier or by its unique integer id.

    If you need to remove multiple elements, it is more efficient to avoid inserting or purging between remove() calls. The
    object needs to build a dictionary with all the unique integer ids. Since ids change by insert() and purge() calls, the
    dictionary will be rebuilt each time, which requires iterating over the whole object.

    Args:
        id (str): Either the same string used as the id when the set was inserted via `insert()` or the unique integer id, if known.
            The latter is faster. The unique integer id is the id used by the iterator when you iterate over the whole object
            to identify the specific set.

    Returns:
        (int): Zero if the set was removed, a negative integer code on error.
    """
    self.set_id	= -1

    if type(id) is int:
        self.int_ids = None
        return remove(self.st_id, id)

    if self.int_ids is None:
        self.int_ids = dict()
        int_id = next_set_id(self.st_id, -1)
        while int_id >= 0:
            self.int_ids[set_name(self.st_id, int_id)] = int_id
            int_id = next_set_id(self.st_id, int_id)

    int_id = self.int_ids.pop(id, None)
    if int_id is None:
        return -1

    return remove(self.st_id, int_id)

save_as_binary_image()

Saves the state of the c++ SetTrie object as a Python list of strings referred to a binary_image.

Returns:

Type Description
list

The binary_image containing the state of the SetTrie. There is not much you can do with it except serializing it as a Python (e.g., pickle) object and loading it into another SetTrie object. Pass it to the constructor to create an initialized object,

Source code in settrie/SetTrie.py
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
def save_as_binary_image(self):
    """ Saves the state of the c++ SetTrie object as a Python
        list of strings referred to a binary_image.

    Returns:
        (list): The binary_image containing the state of the SetTrie. There is
            not much you can do with it except serializing it as a Python
            (e.g., pickle) object and loading it into another SetTrie object.
            Pass it to the constructor to create an initialized object,
    """
    bi_idx = save_as_binary_image(self.st_id)
    if bi_idx == 0:
        return None

    bi = []
    bi_size = binary_image_size(bi_idx)
    for t in range(bi_size):
        bi.append(binary_image_next(bi_idx))

    destroy_binary_image(bi_idx)

    return bi

subsets(set)

Find all the subsets for a given set.

Parameters:

Name Type Description Default
set set

set for which we want to find all the supersets

required

Returns:

Type Description
Result

Iterator object with the IDs of the matching subsets.

Source code in settrie/SetTrie.py
231
232
233
234
235
236
237
238
239
240
def subsets(self, set) -> Result:
    """ Find all the subsets for a given set.

    Args:
        set (set): set for which we want to find all the supersets

    Returns:
        Iterator object with the IDs of the matching subsets.
    """
    return Result(subsets(self.st_id, str(set)))

supersets(set)

Find all the supersets of a given set.

Parameters:

Name Type Description Default
set set

set for which we want to find all the supersets

required

Returns:

Type Description
Result

Iterator object with the IDs of the matching supersets.

Source code in settrie/SetTrie.py
220
221
222
223
224
225
226
227
228
229
def supersets(self, set) -> Result:
    """ Find all the supersets of a given set.

    Args:
        set (set): set for which we want to find all the supersets

    Returns:
        Iterator object with the IDs of the matching supersets.
    """
    return Result(supersets(self.st_id, str(set)))

TreeSet(st_id, set_id)

Class returned by the iterator of SetTrie to simplify iterating over the elements while not computing a list of strings (calling c++ elements()) unless it is required.

Source code in settrie/SetTrie.py
91
92
93
def __init__(self, st_id, set_id):
    self.st_id  = st_id
    self.set_id = set_id