SetTrie
Loading...
Searching...
No Matches
settrie.h
Go to the documentation of this file.
1/* Mercury-Settrie
2
3 Copyright 2023 Banco Bilbao Vizcaya Argentaria, S.A.
4
5 This product includes software developed at
6
7 BBVA (https://www.bbva.com/)
8
9 Licensed under the Apache License, Version 2.0 (the "License");
10 you may not use this file except in compliance with the License.
11 You may obtain a copy of the License at
12
13 http://www.apache.org/licenses/LICENSE-2.0
14
15 Unless required by applicable law or agreed to in writing, software
16 distributed under the License is distributed on an "AS IS" BASIS,
17 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 See the License for the specific language governing permissions and
19 limitations under the License.
20*/
21#ifndef INCLUDED_SETTRIE
22#define INCLUDED_SETTRIE
23
24#include <algorithm>
25#include <functional>
26#include <map>
27#include <string>
28#include <vector>
29#include <cstdint>
30
31#define IMAGE_BUFF_SIZE 6136
32
33#define STATE_IN_USE 0
34#define STATE_HAS_SET_ID 1
35#define STATE_IS_GARBAGE 2
36
37typedef uint64_t ElementHash;
38typedef std::string String;
39
40typedef std::vector<ElementHash> BinarySet;
41typedef std::vector<String> StringSet;
42typedef std::vector<int> IdList;
43
44struct Name {
46 int count;
47};
48
49typedef std::map<ElementHash, Name> StringName;
50typedef std::map<int, String> IdMap;
51
52struct SetNode {
54
56
57 uint8_t state;
58};
59
60typedef std::vector<SetNode> BinaryTree;
61
62// This structure is 64encoded to 8K (3 input bytes == 24 bit -> 4 output chars == 24 bit). Therefore, its size is 6K.
63struct ImageBlock {
65
66 uint8_t buffer[IMAGE_BUFF_SIZE]; // makes sizeof(ImageBlock) == 6K
67};
68
69typedef std::vector<ImageBlock> BinaryImage;
71
72
73class SetTrie {
74
75 public:
76
78 SetNode root = {0, 0, 0, -1, STATE_IN_USE};
79 tree.push_back(root);
81 }
82
83 void insert (StringSet set, String id);
84 void insert (String str, String str_id, char split);
85 String find (StringSet set);
86 String find (String str, char split);
88 StringSet supersets (String str, char split);
90 StringSet subsets (String str, char split);
91 StringSet elements (int idx);
92 int remove (int idx);
93 int purge ();
94 bool load (pBinaryImage &p_bi);
95 bool save (pBinaryImage &p_bi);
96
97 IdMap id = {};
99
100#ifndef TEST
101 private:
102#endif
103
104 inline int insert(int idx, ElementHash value) {
105
106 if (tree[idx].idx_child == 0) {
107 SetNode node = {value, 0, 0, idx, STATE_IN_USE};
108
109 tree.push_back(node);
110
111 int idx_c = tree.size() - 1;
112
113 tree[idx].idx_child = idx_c;
114
115 return idx_c;
116 }
117
118 idx = tree[idx].idx_child;
119
120 while (true) {
121 if (tree[idx].value == value)
122 return idx;
123
124 if (tree[idx].idx_next == 0) {
125 SetNode node = {value, 0, 0, tree[idx].idx_parent, STATE_IN_USE};
126
127 tree.push_back(node);
128
129 int idx_c = tree.size() - 1;
130
131 tree[idx].idx_next = idx_c;
132
133 return idx_c;
134 }
135
136 idx = tree[idx].idx_next;
137 }
138 }
139
140 inline int insert(BinarySet set) {
141
142 int idx = 0;
143 int size = set.size();
144
145 if (size == 0) {
146 tree[0].state = STATE_HAS_SET_ID;
147
148 return 0;
149 }
150
151 for (int i = 0; i < size; i++)
152 idx = insert(idx, set[i]);
153
154 tree[idx].state = STATE_HAS_SET_ID;
155
156 return idx;
157 }
158
159 inline int find(int idx, ElementHash value) {
160
161 if ((idx = tree[idx].idx_child) == 0)
162 return 0;
163
164 while (true) {
165 if (tree[idx].value == value)
166 return idx;
167
168 if ((idx = tree[idx].idx_next) == 0)
169 return 0;
170 }
171 }
172
173 inline int find(BinarySet set) {
174
175 int idx = 0;
176 int size = set.size();
177
178 for (int i = 0; i < size; i++)
179 if ((idx = find(idx, set[i])) == 0)
180 return 0;
181
182 return idx;
183 }
184
185 inline void all_supersets(int t_idx) {
186
187 while (t_idx != 0) {
188 if (tree[t_idx].state == STATE_HAS_SET_ID)
189 result.push_back(t_idx);
190
191 if (int ci = tree[t_idx].idx_child)
192 all_supersets(ci);
193
194 t_idx = tree[t_idx].idx_next;
195 }
196 }
197
198 inline void supersets(int t_idx, int s_idx) {
199
200 while (t_idx != 0) {
201 ElementHash t_value, q_value;
202
203 int found = 0;
204
205 if ((t_value = tree[t_idx].value) == (q_value = query[s_idx])) {
206 if (s_idx == last_query_idx) {
207
208 if (tree[t_idx].state == STATE_HAS_SET_ID)
209 result.push_back(t_idx);
210
211 if (int ci = tree[t_idx].idx_child)
212 all_supersets(ci);
213
214 } else {
215 found = 1;
216 q_value = query[s_idx + 1];
217 }
218 }
219
220 int ni;
221 if (t_value < q_value && (ni = tree[t_idx].idx_child) != 0)
222 supersets(ni, s_idx + found);
223
224 t_idx = tree[t_idx].idx_next;
225 }
226 }
227
228 inline void subsets(int t_idx, int s_idx) {
229
230 while (t_idx != 0) {
231 ElementHash t_value;
232 if ((t_value = tree[t_idx].value) >= query[s_idx]) {
233 int ns_idx = s_idx;
234
235 while (ns_idx < last_query_idx && query[ns_idx] < t_value)
236 ns_idx++;
237
238 if (query[ns_idx] == t_value) {
239 if (tree[t_idx].state == STATE_HAS_SET_ID)
240 result.push_back(t_idx);
241
242 int ni;
243 if ((ni = tree[t_idx].idx_child) != 0) {
244 ns_idx++;
245
246 if (ns_idx <= last_query_idx)
247 subsets(ni, ns_idx);
248 }
249 }
250 }
251
252 t_idx = tree[t_idx].idx_next;
253 }
254 }
255
256 inline void assign_hh_nam(ElementHash hh, String &name) {
257 StringName::iterator it = hh_nam.find(hh);
258
259 if (it == hh_nam.end()) {
260 hh_nam[hh].count = 1;
261 hh_nam[hh].name = name; }
262 else
263 hh_nam[hh].count++;
264 }
265
266 int last_query_idx;
267
268 BinarySet query = {};
269 IdList result = {};
270 BinaryTree tree = {};
271 StringName hh_nam = {};
272};
273#endif
Definition: settrie.h:73
String find(StringSet set)
Definition: settrie.cpp:215
int remove(int idx)
Definition: settrie.cpp:387
StringSet subsets(StringSet set)
Definition: settrie.cpp:314
bool save(pBinaryImage &p_bi)
Definition: settrie.cpp:605
int num_dirty_nodes
Definition: settrie.h:98
void insert(StringSet set, String id)
Definition: settrie.cpp:173
SetTrie()
Definition: settrie.h:77
StringSet elements(int idx)
Definition: settrie.cpp:365
int purge()
Definition: settrie.cpp:451
StringSet supersets(StringSet set)
Definition: settrie.cpp:260
bool load(pBinaryImage &p_bi)
Definition: settrie.cpp:490
#define STATE_IN_USE
Definition: settrie.h:33
uint64_t ElementHash
Definition: settrie.h:37
BinaryImage * pBinaryImage
Definition: settrie.h:70
std::map< int, String > IdMap
Definition: settrie.h:50
std::vector< SetNode > BinaryTree
Definition: settrie.h:60
std::vector< String > StringSet
Definition: settrie.h:41
std::map< ElementHash, Name > StringName
Definition: settrie.h:49
#define IMAGE_BUFF_SIZE
Definition: settrie.h:31
std::vector< ImageBlock > BinaryImage
Definition: settrie.h:69
#define STATE_HAS_SET_ID
Definition: settrie.h:34
std::vector< ElementHash > BinarySet
Definition: settrie.h:40
std::vector< int > IdList
Definition: settrie.h:42
std::string String
Definition: settrie.h:38
Definition: settrie.h:63
int size
Definition: settrie.h:64
uint8_t buffer[IMAGE_BUFF_SIZE]
Definition: settrie.h:66
int block_num
Definition: settrie.h:64
Definition: settrie.h:44
int count
Definition: settrie.h:46
String name
Definition: settrie.h:45
Definition: settrie.h:52
int idx_child
Definition: settrie.h:55
int idx_parent
Definition: settrie.h:55
int idx_next
Definition: settrie.h:55
ElementHash value
Definition: settrie.h:53
uint8_t state
Definition: settrie.h:57