Reels
Loading...
Searching...
No Matches
reels.h
Go to the documentation of this file.
1/* Mercury-Reels
2
3 Copyright 2023 Banco Bilbao Vizcaya Argentaria, S.A.
4
5 This product includes software developed at
6
7 BBVA (https://www.bbva.com/)
8
9 Licensed under the Apache License, Version 2.0 (the "License");
10 you may not use this file except in compliance with the License.
11 You may obtain a copy of the License at
12
13 http://www.apache.org/licenses/LICENSE-2.0
14
15 Unless required by applicable law or agreed to in writing, software
16 distributed under the License is distributed on an "AS IS" BASIS,
17 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 See the License for the specific language governing permissions and
19 limitations under the License.
20*/
21#include <algorithm>
22#include <map>
23#include <math.h>
24#include <set>
25#include <string>
26#include <string.h>
27#include <time.h>
28#include <vector>
29#include <stdarg.h>
30#include <cstdint>
31
32#ifdef DEBUG
33#include <iostream>
34#include <fstream>
35#include <sstream>
36#endif
37
38#ifdef TEST
39#ifndef INCLUDED_CATCH2
40#define INCLUDED_CATCH2
41
42#include "catch.hpp"
43
44#endif
45#endif
46
47
48#ifndef INCLUDED_REELS_TYPES
49#define INCLUDED_REELS_TYPES
50
51
55namespace reels
56{
57
58#define IMAGE_BUFF_SIZE 6136
59#define PRIORITY_SEEN_FACTOR 2000000000
60#define DEFAULT_NUM_EVENTS 1000
61#define MAX_SEQ_LEN_IN_PREDICT 1000
62#define PREDICT_MAX_TIME (100*365.25*24*3600)
63#define WEIGHT_PRECISION 10000
64
65typedef uint64_t ElementHash;
66typedef std::string String;
67typedef const char * pChar;
68
69typedef time_t TimePoint;
70typedef struct tm TimeStruct;
71typedef double ExtFloat;
72
73
78struct ImageBlock {
79 int size;
81
83};
84
85
86typedef std::vector<ImageBlock> BinaryImage;
88
89
94struct BinEventPt {
97 double w;
98
105 bool operator==(const BinEventPt &o) const {
106 return e == o.e && d == o.d && round(WEIGHT_PRECISION*w) == round(WEIGHT_PRECISION*o.w);
107 }
108
115 bool operator<(const BinEventPt &o) const {
116 return e < o.e || (e == o.e && d < o.d) || (e == o.e && d == o.d && round(WEIGHT_PRECISION*w) < round(WEIGHT_PRECISION*o.w));
117 }
118};
119
120
129
130
135struct EventStat {
136 uint64_t seen;
137 uint64_t code;
138 uint64_t priority;
139};
140
141
146typedef std::map<BinEventPt, EventStat> EventMap;
147
148
153typedef std::map<uint64_t, BinEventPt> PriorityMap;
154
155
159typedef std::map<uint64_t, uint64_t> EventCodeMap;
160
161
166 uint64_t n_succ_seen;
167 uint64_t n_succ_target;
168 uint64_t n_incl_seen;
169 uint64_t n_incl_target;
170 uint64_t sum_dep;
171 int n_dep;
172};
173
174
178typedef std::map<uint64_t, CodeInTreeStatistics> CodeInTreeStatMap;
179
180
185 uint64_t seen;
187};
188
189
194typedef std::map<ElementHash, StringUsage> StringUsageMap;
195
196
201typedef std::vector<ElementHash> ClientIDs;
202
203
208typedef std::set<ElementHash> ClientIDSet;
209
210
213typedef std::map<TimePoint, uint64_t> Clip;
214
215
220typedef std::map<ElementHash, Clip> ClipMap;
221typedef ClipMap * pClipMap;
222
223
228typedef std::map<ElementHash, TimePoint> TargetMap;
230
231
236typedef std::vector<double> TimesToTarget;
237
238
241typedef std::set<uint64_t> CodeSet;
242typedef CodeSet * pCodeSet;
243
244
252
253
255
256
259typedef std::vector<OptimizeEvalItem> OptimizeEval;
260
261
265 uint64_t code;
266 double score;
267};
268
269
272typedef std::vector <CodeScoreItem> CodeScores;
273
274
277typedef std::map<uint64_t, int> ChildIndex;
278
279
289
290
295typedef std::vector<CodeTreeNode> CodeTree;
297
298
302
303
307
308
309// Forward declaration of utilities used in other functions.
310uint64_t MurmurHash64A (const void *key, int len);
311bool image_put(pBinaryImage p_bi, void *p_data, int size);
312bool image_get(pBinaryImage p_bi, int &c_block, int &c_ofs, void *p_data, int size);
313
314
317class Logger {
318
319 public:
320
328 void log_printf(const char *fmt, ...) {
329 va_list args;
330 va_start(args, fmt);
331 log_printf(fmt, args);
332 va_end(args);
333 }
334
340 void log_printf(const char *fmt, va_list args) {
341 char buffer[256];
342
343 vsnprintf(buffer, sizeof(buffer), fmt, args);
344
345 log = log + buffer;
346 }
347
351};
352
353
354class Clips; // Forward declaration
355
366class Events {
367
368 public:
369
371
372 bool store_strings = true;
374
375
385 void insert_row(pChar p_e,
386 pChar p_d,
387 double w);
388
389
402 bool define_event(pChar p_e,
403 pChar p_d,
404 double w,
405 uint64_t code);
406
407
446 String optimize_events (Clips &clips, TargetMap &targets, int num_steps = 10, int codes_per_step = 5, double threshold = 0.0001,
447 pCodeSet p_force_include = nullptr, pCodeSet p_force_exclude = nullptr, Transform x_form = tr_linear,
448 Aggregate agg = ag_longest, double p = 0.5, int depth = 1000, bool as_states = true,
449 double exp_decay = 0.00693, double lower_bound_p = 0.95, bool log_lift = true);
450
451
470 bool score_model(double &score, double &targ_prop, CodeInTreeStatMap &codes_stat, bool calc_tree_stats, Clips &clips,
471 TargetMap &targets, EventCodeMap code_dict, Transform x_form, Aggregate agg, double p, int depth, bool as_states);
472
473
486 CodeScores get_top_codes(CodeInTreeStatMap &codes_stat, double targ_prop, double exp_decay, double lower_bound_p,
487 bool log_lift);
488
489
497 ExtFloat s_h = 0, s_o = 0, sho = 0, ssh = 0, sso = 0;
498 int n = 0;
499
500 for (OptimizeEval::iterator it = ev.begin(); it != ev.end(); ++it) {
501 if (it->t_obs != 0) {
502 s_h += it->t_hat;
503 s_o += it->t_obs;
504 sho += it->t_hat*it->t_obs;
505 ssh += it->t_hat*it->t_hat;
506 sso += it->t_obs*it->t_obs;
507
508 n++;
509 }
510 }
511 if (n == 0)
512 return 0;
513
514 double d2 = (n*ssh - s_h*s_h)*(n*sso - s_o*s_o);
515
516 if (d2 <= 1e-20)
517 return 0;
518
519 return (n*sho - s_h*s_o)/sqrt(d2);
520 }
521
522
529 bool load(pBinaryImage &p_bi);
530
531
540 bool load(pBinaryImage &p_bi, int &c_block, int &c_ofs);
541
542
549 bool save(pBinaryImage &p_bi);
550
551
556 inline void set_max_num_events(int max_events) {
557 max_num_events = max_events;
558 }
559
560
565 inline void set_store_strings(bool store) {
566 store_strings = store;
567 }
568
569
576 inline ElementHash add_str(pChar p_str) {
577 int ll = strlen(p_str);
578
579 if (!ll)
580 return 0;
581
582 ElementHash hash = MurmurHash64A(p_str, ll);
583
584 if (!store_strings)
585 return hash;
586
587 StringUsageMap::iterator it = names_map.find(hash);
588
589 if (it != names_map.end())
590 it->second.seen++;
591 else {
592 StringUsage su = {1, p_str};
593
594 names_map[hash] = su;
595 }
596 return hash;
597 }
598
599
605 inline void erase_str(ElementHash hash) {
606
607 if (store_strings) {
608 StringUsageMap::iterator it = names_map.find(hash);
609
610 if (it != names_map.end()) {
611 if (--it->second.seen == 0)
612 names_map.erase(it);
613 }
614 }
615 }
616
617
625
626 if (store_strings) {
627 if (!hash)
628 return "";
629
630 StringUsageMap::iterator it = names_map.find(hash);
631
632 if (it != names_map.end())
633 return it->second.str;
634 }
635 return "\x04";
636 }
637
638
645 inline uint64_t event_code(BinEventPt &ept) {
646 EventMap::iterator it = event.find(ept);
647
648 if (it == event.end())
649 return 0;
650
651 return it->second.code;
652 }
653
654
659 inline int num_events() {
660 return event.size();
661 }
662
663
668 inline EventMap::iterator events_begin() {
669 return event.begin();
670 }
671
672
677 inline EventMap::iterator events_end() {
678 return event.end();
679 }
680
681
688 inline EventMap::iterator events_next_after_find(BinEventPt &ept) {
689 EventMap::iterator it = event.find(ept);
690
691 if (it != event.end())
692 ++it;
693
694 return it;
695 }
696
697#ifndef TEST
698 private:
699#endif
700
701 uint64_t priority_low = 0;
702 uint64_t next_code = 0;
703
704 StringUsageMap names_map = {};
705 EventMap event = {};
706 PriorityMap priority = {};
707};
708
709
713class Clients {
714
715 public:
716
718
719
727 int ll = strlen(p_cli);
728
729 return ll == 0 ? 0 : MurmurHash64A(p_cli, ll);
730 }
731
732
737 void add_client_id(pChar p_cli);
738
739
746 bool load(pBinaryImage &p_bi);
747
748
757 bool load(pBinaryImage &p_bi, int &c_block, int &c_ofs);
758
759
766 bool save(pBinaryImage &p_bi);
767
768 ClientIDs id = {};
770};
771
772
775class TimeUtil {
776
777 public:
778
780
781 char time_format[128] = "%Y-%m-%d %H:%M:%S";
782
783
791
792 TimeStruct ts = {0};
793
794 if (strptime(p_t, time_format, &ts) == nullptr)
795 return -1;
796
797 return timegm(&ts);
798 }
799
800
806 inline void set_time_format(pChar fmt) {
807 strncpy(time_format, fmt, sizeof(time_format) - 1);
808 }
809};
810
811
816class Clips : public TimeUtil {
817
818 public:
819
823 Clips() {}
824
825
831 Clips(Clients clients, Events events) : clients(clients), events(events) {}
832
833
839 clients = Clients();
840 events = Events();
841
842 clips = clip_map;
843 }
844
845
850 Clips(Clips &o_clips) {
851 pBinaryImage p_bi = new BinaryImage;
852
853 o_clips.save(p_bi);
854
855 load(p_bi);
856
857 delete p_bi;
858 }
859
860
872 bool scan_event(pChar p_e,
873 pChar p_d,
874 double w,
875 pChar p_c,
876 pChar p_t);
877
878
886 inline void insert_event(ElementHash client_hash,
887 uint64_t code,
888 TimePoint time_pt) {
889
890 ClipMap::iterator it = clips.find(client_hash);
891
892 if (it == clips.end()) {
893 Clip clip = {};
894
895 clip[time_pt] = code;
896
897 clips[client_hash] = clip;
898
899 } else
900 it->second[time_pt] = code;
901 };
902
903
910 bool load(pBinaryImage &p_bi);
911
912
919 bool save(pBinaryImage &p_bi);
920
921
927 return &clips;
928 }
929
930
935 inline uint64_t num_events() {
936
937 uint64_t ret = 0;
938
939 for (ClipMap::iterator it = clips.begin(); it != clips.end(); ++it)
940 ret += it->second.size();
941
942 return ret;
943 }
944
949 inline void collapse_to_states() {
950 for (ClipMap::iterator it_client = clips.begin(); it_client != clips.end(); ++it_client) {
951 uint64_t last_code = 0xA30BdefacedCabal;
952 for (Clip::const_iterator it = it_client->second.cbegin(); it != it_client->second.cend();) {
953 uint64_t code = it->second;
954 if (code == last_code)
955 it_client->second.erase(it++);
956 else
957 ++it;
958 last_code = code;
959 }
960 }
961 }
962
963#ifndef TEST
964 private:
965#endif
966
967 Clients clients;
968 Events events;
969 ClipMap clips = {};
970};
971
972
976class Targets : public TimeUtil {
977
978 public:
979
985 Targets(pClipMap p_clips, TargetMap target) : p_clips(p_clips), target(target) {
986 CodeTreeNode root = {0, 0, 0, {}};
987 tree.push_back(root);
988 }
989
990
1001 bool insert_target(pChar p_c, pChar p_t);
1002
1003
1018 bool fit(Transform x_form, Aggregate agg, double p, int depth, bool as_states);
1019
1020
1028
1029
1041
1042
1052
1053
1065 void verbose_predict_clip(const ElementHash &client,
1066 Clip &clip,
1067 TimePoint &obs_time,
1068 bool &target_yn,
1069 int &longest_seq,
1070 uint64_t &n_visits,
1071 uint64_t &n_targets,
1072 double &targ_mean_t);
1073
1074
1081 bool load(pBinaryImage &p_bi);
1082
1083
1090 bool save(pBinaryImage &p_bi);
1091
1092
1102 inline int update_node(int idx_parent, uint64_t code, bool target, ExtFloat time_d) {
1103
1104 if (idx_parent == 0) { // The root node contains the prediction of the zero-length clip.
1105 tree[0].n_seen++;
1106 if (target) {
1107 tree[0].n_target++;
1108 tree[0].sum_time_d += time_d;
1109 }
1110 }
1111
1112 ChildIndex::iterator it = tree[idx_parent].child.find(code);
1113
1114 if (it != tree[idx_parent].child.end()) {
1115 int idx = it->second;
1116
1117 tree[idx].n_seen++;
1118 if (target) {
1119 tree[idx].n_target++;
1120 tree[idx].sum_time_d += time_d;
1121 }
1122
1123 return idx;
1124 }
1125
1126 CodeTreeNode node = {1, target, time_d, {}};
1127
1128 tree.push_back(node);
1129
1130 int idx = tree.size() - 1;
1131
1132 tree[idx_parent].child[code] = idx;
1133
1134 return idx;
1135 }
1136
1137
1144 inline double normal_pdf(double x) {
1145 // https://stackoverflow.com/questions/10847007/using-the-gaussian-probability-density-function-in-c
1146
1147 static const double inv_sqrt_2pi = 0.3989422804014327;
1148
1149 return exp(-0.5*x*x)*inv_sqrt_2pi;
1150 }
1151
1152
1159 inline double normal_cdf(double x) {
1160 // https://stackoverflow.com/questions/2328258/cumulative-normal-distribution-function-in-c-c
1161 // https://cplusplus.com/reference/cmath/erfc/
1162
1163 static const double m_sqrt_dot_5 = 0.7071067811865476;
1164
1165 return 0.5*erfc(-x*m_sqrt_dot_5);
1166 }
1167
1168
1179 inline double agresti_coull_upper_bound(uint64_t n_hits, uint64_t n_total) {
1180 // https://github.com/msn0/agresti-coull-interval/blob/master/src/agresti.js
1181
1182 double n_tilde = n_total + binomial_z_sqr;
1183 double p_tilde = (n_hits + binomial_z_sqr_div_2)/n_tilde;
1184 double a = binomial_z*sqrt(p_tilde*(1 - p_tilde)/n_tilde);
1185
1186 return p_tilde + a;
1187 }
1188
1189
1200 inline double agresti_coull_lower_bound(uint64_t n_hits, uint64_t n_total) {
1201 // https://github.com/msn0/agresti-coull-interval/blob/master/src/agresti.js
1202
1203 double n_tilde = n_total + binomial_z_sqr;
1204 double p_tilde = (n_hits + binomial_z_sqr_div_2)/n_tilde;
1205 double a = binomial_z*sqrt(p_tilde*(1 - p_tilde)/n_tilde);
1206
1207 return p_tilde - a;
1208 }
1209
1210
1234 inline double predict_time(CodeTreeNode &node) {
1235
1236 if (node.n_target <= 0)
1237 return PREDICT_MAX_TIME;
1238
1239 double lb = std::max(1e-4, agresti_coull_lower_bound(node.n_target, node.n_seen));
1240 double mu_hat = transform == tr_linear ? ((double) node.sum_time_d)/node.n_target : exp(((double) node.sum_time_d)/node.n_target);
1241
1242 return mu_hat/lb;
1243 }
1244
1245
1252 inline double predict_clip(Clip clip) {
1253
1254 int idx = 0, n = 0;
1255
1256 double t[MAX_SEQ_LEN_IN_PREDICT];
1257
1258 for (Clip::reverse_iterator it = clip.rbegin(); it != clip.rend(); it++) {
1259 ChildIndex::iterator jt = tree[idx].child.find(it->second);
1260
1261 if (jt == tree[idx].child.end())
1262 break;
1263
1264 idx = jt->second;
1265
1266 t[n++] = predict_time(tree[idx]);
1267 }
1268
1269 if (n == 0)
1270 return predict_time(tree[0]);
1271
1272 if (aggregate == ag_longest)
1273 return t[n - 1];
1274
1275 double ret = t[0];
1276
1277 if (aggregate == ag_mean) {
1278 for (int i = 1; i < n; i++)
1279 ret += t[i];
1280
1281 return ret/n;
1282 }
1283
1284 for (int i = 1; i < n; i++)
1285 ret = std::min(ret, t[i]);
1286
1287 return ret;
1288 }
1289
1290
1301 bool recurse_tree_stats(int depth, int idx, int parent_idx, uint64_t code, CodeInTreeStatMap &codes_stat);
1302
1303
1308 inline int num_targets() {
1309 return target.size();
1310 }
1311
1312
1317 inline int tree_size() {
1318 return tree.size();
1319 }
1320
1321
1327 return p_clips;
1328 }
1329
1330
1336 return &tree;
1337 }
1338
1339
1345 return &target;
1346 }
1347
1348#ifndef TEST
1349 private:
1350#endif
1351
1352 pClipMap p_clips;
1353 TargetMap target;
1354 CodeTree tree = {};
1355 Transform transform = tr_undefined;
1356 Aggregate aggregate = ag_undefined;
1357 double binomial_z = 0;
1358 double binomial_z_sqr = 0;
1359 double binomial_z_sqr_div_2 = 0;
1360 int tree_depth = 0;
1361};
1362
1363} // namespace reels
1364
1365#endif // ifndef INCLUDED_REELS_TYPES
A container class to hold client ids.
Definition reels.h:713
void add_client_id(pChar p_cli)
Add a client ID to this container.
Definition reels.cpp:800
ElementHash hash_client_id(pChar p_cli)
Return the hash of a client ID as an ElementHash.
Definition reels.h:726
Clients()
Definition reels.h:717
bool save(pBinaryImage &p_bi)
Save the state of an object into a base64 mercury-dynamics serialization using image_put()
Definition reels.cpp:849
ClientIDSet id_set
The set of the same hashes for fast search.
Definition reels.h:769
bool load(pBinaryImage &p_bi)
Load the state of an object from a base64 mercury-dynamics serialization using image_get()
Definition reels.cpp:809
A container class to hold clips (sequences of events).
Definition reels.h:816
pClipMap clip_map()
The address of the internal ClipMap to be accessed from a Targets object.
Definition reels.h:926
void insert_event(ElementHash client_hash, uint64_t code, TimePoint time_pt)
The kernel of a scan_event() made inline, when all checks and conversion to binary are successful.
Definition reels.h:886
Clips()
Default construct a Clips object as an abstract method. This is required for declaring a Clips object...
Definition reels.h:823
void collapse_to_states()
Collapse the ClipMap to states.
Definition reels.h:949
Clips(Clients clients, Events events)
Construct a Clips object from a Clients and an Events objects.
Definition reels.h:831
bool load(pBinaryImage &p_bi)
Load the state of an object from a base64 mercury-dynamics serialization using image_get()
Definition reels.cpp:922
Clips(Clips &o_clips)
Copy-construct a Clips object.
Definition reels.h:850
bool scan_event(pChar p_e, pChar p_d, double w, pChar p_c, pChar p_t)
Process a row from a transaction file, to add the event to the client's timeline (clip).
Definition reels.cpp:877
Clips(const ClipMap &clip_map)
Construct a Clips object from a ClipMap to be copied.
Definition reels.h:838
uint64_t num_events()
Return the number of events stored in the internal ClipMap.
Definition reels.h:935
bool save(pBinaryImage &p_bi)
Save the state of an object into a base64 mercury-dynamics serialization using image_put()
Definition reels.cpp:979
A container class to hold events.
Definition reels.h:366
String optimize_events(Clips &clips, TargetMap &targets, int num_steps=10, int codes_per_step=5, double threshold=0.0001, pCodeSet p_force_include=nullptr, pCodeSet p_force_exclude=nullptr, Transform x_form=tr_linear, Aggregate agg=ag_longest, double p=0.5, int depth=1000, bool as_states=true, double exp_decay=0.00693, double lower_bound_p=0.95, bool log_lift=true)
Events optimizer.
Definition reels.cpp:275
uint64_t event_code(BinEventPt &ept)
Return the code associated to an BinEventPt if found in the object.
Definition reels.h:645
EventMap::iterator events_next_after_find(BinEventPt &ept)
Return the EventMap::iterator to the next BinEventPt after matching ev or nullptr if not found or is ...
Definition reels.h:688
void insert_row(pChar p_e, pChar p_d, double w)
Process a row from a transaction file.
Definition reels.cpp:202
bool load(pBinaryImage &p_bi)
Load the state of an object from a base64 mercury-dynamics serialization using image_get()
Definition reels.cpp:605
EventMap::iterator events_begin()
Return the EventMap::iterator to the first elements in the private variable .events.
Definition reels.h:668
EventMap::iterator events_end()
Return the EventMap::iterator to past-the-end in the private variable .events.
Definition reels.h:677
int num_events()
Return the number of events stored in the object.
Definition reels.h:659
void erase_str(ElementHash hash)
Remove a string from the StringUsageMap by decreasing its use count and destroying it if not used any...
Definition reels.h:605
bool define_event(pChar p_e, pChar p_d, double w, uint64_t code)
Define events explicitly.
Definition reels.cpp:250
ElementHash add_str(pChar p_str)
Define a new string and push it into the StringUsageMap.
Definition reels.h:576
bool save(pBinaryImage &p_bi)
Save the state of an object into a base64 mercury-dynamics serialization using image_put()
Definition reels.cpp:722
double linear_correlation(OptimizeEval &ev)
Compute Pearson linear correlation between predicted and observed in an OptimizeEval.
Definition reels.h:496
bool store_strings
If true, the object stores the string values.
Definition reels.h:372
bool score_model(double &score, double &targ_prop, CodeInTreeStatMap &codes_stat, bool calc_tree_stats, Clips &clips, TargetMap &targets, EventCodeMap code_dict, Transform x_form, Aggregate agg, double p, int depth, bool as_states)
Internal: Do one step of the optimize_events() method.
Definition reels.cpp:439
CodeScores get_top_codes(CodeInTreeStatMap &codes_stat, double targ_prop, double exp_decay, double lower_bound_p, bool log_lift)
Internal: Extract the top top_n codes by lift from a CodeInTreeStatMap map.
Definition reels.cpp:535
String get_str(ElementHash hash)
Get a string content from its hash value.
Definition reels.h:624
void set_store_strings(bool store)
Sets the public property store_strings to simplify the python interface.
Definition reels.h:565
void set_max_num_events(int max_events)
Sets the public property max_num_events to simplify the python interface.
Definition reels.h:556
Events()
Definition reels.h:370
int max_num_events
The maximum number of recurrent event stored via insert_row()
Definition reels.h:373
A minimalist logger stored as a std::string providing sprintf functionality.
Definition reels.h:317
String log
The std::string storing the content of the Logger is public.
Definition reels.h:350
void log_printf(const char *fmt, va_list args)
Add a nicely formatted string smaller than 256 chars to the logger.
Definition reels.h:340
void log_printf(const char *fmt,...)
Logging method wrapper supporting variable arguments.
Definition reels.h:328
A container class to hold target events and do predictions based on clips.
Definition reels.h:976
bool fit(Transform x_form, Aggregate agg, double p, int depth, bool as_states)
Fit the prediction model.
Definition reels.cpp:1054
pTargetMap p_target()
The address of the internal TargetMap.
Definition reels.h:1344
int num_targets()
Return the size of the internal TargetMap.
Definition reels.h:1308
double agresti_coull_upper_bound(uint64_t n_hits, uint64_t n_total)
Upper bound of the Agresti-Coull confidence interval for a binomial proportion.
Definition reels.h:1179
pClipMap clip_map()
The address of the internal ClipMap.
Definition reels.h:1326
void verbose_predict_clip(const ElementHash &client, Clip &clip, TimePoint &obs_time, bool &target_yn, int &longest_seq, uint64_t &n_visits, uint64_t &n_targets, double &targ_mean_t)
Predict time for a single Clip returning all kind of prediction related information.
Definition reels.cpp:1199
int tree_size()
Return the size of the internal CodeTree.
Definition reels.h:1317
double normal_pdf(double x)
Density (pdf) for the normal distribution with mean 0 and standard deviation 1.
Definition reels.h:1144
Targets(pClipMap p_clips, TargetMap target)
Construct a Targets object from a Clips object and a TargetMap.
Definition reels.h:985
double predict_time(CodeTreeNode &node)
Predict the time to target for a sub-clip that starts at a node.
Definition reels.h:1234
pCodeTree p_tree()
The address of the internal CodeTree.
Definition reels.h:1335
bool load(pBinaryImage &p_bi)
Load the state of an object from a base64 mercury-dynamics serialization using image_get()
Definition reels.cpp:1275
double predict_clip(Clip clip)
Predict the time to target for a clip.
Definition reels.h:1252
bool insert_target(pChar p_c, pChar p_t)
Utility to fill the internal TargetMap target.
Definition reels.cpp:1031
double agresti_coull_lower_bound(uint64_t n_hits, uint64_t n_total)
Lower bound of the Agresti-Coull confidence interval for a binomial proportion.
Definition reels.h:1200
bool save(pBinaryImage &p_bi)
Save the state of an object into a base64 mercury-dynamics serialization using image_put()
Definition reels.cpp:1411
int update_node(int idx_parent, uint64_t code, bool target, ExtFloat time_d)
Update (fit) the CodeTree inserting new nodes as necessary.
Definition reels.h:1102
TimesToTarget predict()
Predict time to target for all the clients in the Clips object used to fit the model.
Definition reels.cpp:1147
bool recurse_tree_stats(int depth, int idx, int parent_idx, uint64_t code, CodeInTreeStatMap &codes_stat)
Recursive tree exploration updating a CodeInTreeStatMap map.
Definition reels.cpp:1247
double normal_cdf(double x)
Cumulative distribution (cdf) for the normal distribution with mean 0 and standard deviation 1.
Definition reels.h:1159
A common ancestor of Clips and Targets to avoid duplicating time management.
Definition reels.h:775
TimeUtil()
Definition reels.h:779
char time_format[128]
Date and time format for insert_row() and define_event()
Definition reels.h:781
void set_time_format(pChar fmt)
Sets the public property time_format to simplify the python interface.
Definition reels.h:806
TimePoint get_time(pChar p_t)
Convert time as a string to a TimePoint (using the object's time_format).
Definition reels.h:790
The namespace including everything to simplify using Reels in a c++ application,.
Definition reels.cpp:35
std::vector< CodeScoreItem > CodeScores
CodeScores: A vector of CodeScoreItem.
Definition reels.h:272
std::vector< OptimizeEvalItem > OptimizeEval
OptimizeEval: A vector of OptimizeEvalItem.
Definition reels.h:259
double ExtFloat
Accumulator type: Was a 128 bit float, changed to 64 for macos compatibility.
Definition reels.h:71
TargetMap * pTargetMap
Pointer to a TargetMap.
Definition reels.h:229
std::set< uint64_t > CodeSet
CodeSet: A set of event codes.
Definition reels.h:241
CodeTree * pCodeTree
Pointer to a CodeTree.
Definition reels.h:296
std::vector< ElementHash > ClientIDs
ClientIDs: A vector of client ID hashes.
Definition reels.h:201
std::map< TimePoint, uint64_t > Clip
Clip: The clip (timeline) of a client is just a map of time points and codes.
Definition reels.h:213
std::map< uint64_t, BinEventPt > PriorityMap
PriorityMap: A map with all the acceptable priority values in the EventMap as keys.
Definition reels.h:153
time_t TimePoint
A c 8 byte integer time point.
Definition reels.h:69
const char * pChar
A c string.
Definition reels.h:67
CodeTreeNode * pCodeTreeNode
Pointer to a CodeTreeNode.
Definition reels.h:288
std::map< ElementHash, Clip > ClipMap
ClipMap: A map from clients to clips.
Definition reels.h:220
uint64_t ElementHash
A binary hash of a string.
Definition reels.h:65
bool compare_optimize_eval(const OptimizeEvalItem a, const OptimizeEvalItem b)
Compare two OptimizeEvalItem structures for sorting.
Definition reels.cpp:431
std::set< ElementHash > ClientIDSet
ClientIDSet: A set of client ID hashes.
Definition reels.h:208
std::map< BinEventPt, EventStat > EventMap
EventMap: A map from hashes in an BinEventPt to usage data defines the info about an event.
Definition reels.h:146
std::string String
A dynamically allocated c++ string.
Definition reels.h:66
ClipMap * pClipMap
Pointer to a ClipMap.
Definition reels.h:221
std::map< uint64_t, int > ChildIndex
ChildIndex: A map to find the next child in a CodeTree.
Definition reels.h:277
bool image_put(pBinaryImage p_bi, void *p_data, int size)
A function to push arbitrary raw data into a BinaryImage.
Definition reels.cpp:106
struct tm TimeStruct
A c structure of integer fields.
Definition reels.h:70
BinaryImage * pBinaryImage
A pointer to BinaryImage.
Definition reels.h:87
std::vector< ImageBlock > BinaryImage
An array of generic blocks to serialize anything.
Definition reels.h:86
std::map< uint64_t, uint64_t > EventCodeMap
EventCodeMap: A map converting the space of Event codes into a lower cardinality set for Event optimi...
Definition reels.h:159
std::map< ElementHash, TimePoint > TargetMap
TargetMap: A map from clients to target event TimePoints.
Definition reels.h:228
Transform
Transform: The transformation applied to time differences. (And inverted again in predict()....
Definition reels.h:301
@ tr_linear
Definition reels.h:301
@ tr_undefined
Definition reels.h:301
@ tr_log
Definition reels.h:301
std::map< uint64_t, CodeInTreeStatistics > CodeInTreeStatMap
CodeInTreeStatMap: A map to store all the CodeInTreeStatistics by code.
Definition reels.h:178
std::vector< CodeTreeNode > CodeTree
CodeTree: A tree of fitted targets.
Definition reels.h:295
CodeSet * pCodeSet
Pointer to a CodeSet.
Definition reels.h:242
uint64_t MurmurHash64A(const void *key, int len)
MurmurHash2, 64-bit versions, by Austin Appleby.
Definition reels.cpp:56
std::map< ElementHash, StringUsage > StringUsageMap
StringUsageMap: A map from hashes to string and number of times the string is used.
Definition reels.h:194
bool image_get(pBinaryImage p_bi, int &c_block, int &c_ofs, void *p_data, int size)
A function to get an arbitrary raw data block from a BinaryImage.
Definition reels.cpp:168
Aggregate
Aggregate: The method used to aggregate predictions for different sequence lengths.
Definition reels.h:306
@ ag_undefined
Definition reels.h:306
@ ag_longest
Definition reels.h:306
@ ag_mean
Definition reels.h:306
@ ag_minimax
Definition reels.h:306
std::vector< double > TimesToTarget
TimesToTarget: A vector of predictions.
Definition reels.h:236
ClientsServer clients
Definition reels.cpp:1534
ClipsServer clips
Definition reels.cpp:1535
TargetsServer targets
Definition reels.cpp:1536
EventsServer events
Definition reels.cpp:1533
#define PREDICT_MAX_TIME
Hundred years when the target was never seen.
Definition reels.h:62
#define MAX_SEQ_LEN_IN_PREDICT
The maximum sequence length used in prediction.
Definition reels.h:61
#define IMAGE_BUFF_SIZE
Makes sizeof(ImageBlock) == 6K.
Definition reels.h:58
#define DEFAULT_NUM_EVENTS
A size to store events in an Events object by default.
Definition reels.h:60
#define WEIGHT_PRECISION
10^ the number of digits at which weight is rounded
Definition reels.h:63
The binary representation of an event as stored in a transaction file.
Definition reels.h:94
double w
The "weight". A double representing a weight of the event.
Definition reels.h:97
bool operator<(const BinEventPt &o) const
Compare to another BinEventPt for strict order to support use as a key in a map.
Definition reels.h:115
ElementHash d
The "description". A binary hash of a string representing "the event".
Definition reels.h:96
bool operator==(const BinEventPt &o) const
Compare to another BinEventPt for identity to support use as a key in a map.
Definition reels.h:105
ElementHash e
The "emitter". A binary hash of a string representing "owner of event".
Definition reels.h:95
The binary representation of a transaction in a transaction file.
Definition reels.h:125
ElementHash c
The "client". A binary hash of a string representing "the actor".
Definition reels.h:126
TimePoint t
The "time". A timestamp of the event.
Definition reels.h:127
CodeInTreeStatistics: A structure to compute aggregated statistics of for each code.
Definition reels.h:165
uint64_t n_succ_target
Definition reels.h:167
int n_dep
Number of elements sum_dep has.
Definition reels.h:171
uint64_t n_incl_target
Definition reels.h:169
uint64_t n_succ_seen
Definition reels.h:166
uint64_t sum_dep
Sum of tree depth to estimate mean depth.
Definition reels.h:170
uint64_t n_incl_seen
Definition reels.h:168
CodeScoreItem: A structure to sort codes by lift.
Definition reels.h:264
double score
The score.
Definition reels.h:266
uint64_t code
The code.
Definition reels.h:265
CodeTreeNode: Each node in a fitted CodeTree.
Definition reels.h:282
uint64_t n_target
The number of clips that visited the node with the target.
Definition reels.h:284
uint64_t n_seen
The number of clips that visited the node (target and no target).
Definition reels.h:283
ExtFloat sum_time_d
Sum of time differences for the elements with a defined target.
Definition reels.h:285
ChildIndex child
A map of children by code (key) to index in the CodeTree.
Definition reels.h:286
The metrics associated to an event identified by a BinEventPt.
Definition reels.h:135
uint64_t seen
Number of times the event has been seen in the data.
Definition reels.h:136
uint64_t priority
The (unique) current priority assigned in the priority queue (set) to this event.
Definition reels.h:138
uint64_t code
A code number identifying the event.
Definition reels.h:137
An generic block structure to store object state in a Python-friendly way.
Definition reels.h:78
int block_num
The current block number in the BinaryImage.
Definition reels.h:80
uint8_t buffer[IMAGE_BUFF_SIZE]
The buffer.
Definition reels.h:82
int size
The number of already allocated bytes inside the current block.
Definition reels.h:79
OptimizeEvalItem: A structure to compare predicted and observed.
Definition reels.h:247
TimePoint t_obs
The observed result: Zero for not a target or elapsed time since the previous event in clip to target...
Definition reels.h:249
int seq_len
The length of the predicting clip.
Definition reels.h:250
double t_hat
The prediction (elapsed time since the last event in clip to predicted target).
Definition reels.h:248
StringUsage: A pair of String and number of times it is used.
Definition reels.h:184
String str
The string as plain text.
Definition reels.h:186
uint64_t seen
Number of times string is used. Increase by add_str() calls to the same string, decreased/destroyed b...
Definition reels.h:185