Reels
Loading...
Searching...
No Matches
reels.h
Go to the documentation of this file.
1/* Mercury-Reels
2
3 Copyright 2023 Banco Bilbao Vizcaya Argentaria, S.A.
4
5 This product includes software developed at
6
7 BBVA (https://www.bbva.com/)
8
9 Licensed under the Apache License, Version 2.0 (the "License");
10 you may not use this file except in compliance with the License.
11 You may obtain a copy of the License at
12
13 http://www.apache.org/licenses/LICENSE-2.0
14
15 Unless required by applicable law or agreed to in writing, software
16 distributed under the License is distributed on an "AS IS" BASIS,
17 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 See the License for the specific language governing permissions and
19 limitations under the License.
20*/
21#include <algorithm>
22#include <map>
23#include <math.h>
24#include <set>
25#include <string>
26#include <string.h>
27#include <time.h>
28#include <vector>
29#include <stdarg.h>
30#include <cstdint>
31
32#ifdef DEBUG
33#include <iostream>
34#include <fstream>
35#include <sstream>
36#endif
37
38#ifdef TEST
39#ifndef INCLUDED_CATCH2
40#define INCLUDED_CATCH2
41
42#include "catch.hpp"
43
44#endif
45#endif
46
47
48#ifndef INCLUDED_REELS_TYPES
49#define INCLUDED_REELS_TYPES
50
51
55namespace reels
56{
57
58#define IMAGE_BUFF_SIZE 6136
59#define PRIORITY_SEEN_FACTOR 2000000000
60#define DEFAULT_NUM_EVENTS 1000
61#define MAX_SEQ_LEN_IN_PREDICT 1000
62#define PREDICT_MAX_TIME (100*365.25*24*3600)
63#define WEIGHT_PRECISION 10000
64
65typedef uint64_t ElementHash;
66typedef std::string String;
67typedef const char * pChar;
68
69typedef time_t TimePoint;
70typedef struct tm TimeStruct;
71typedef double ExtFloat;
72
73
78struct ImageBlock {
79 int size;
81
83};
84
85
86typedef std::vector<ImageBlock> BinaryImage;
88
89
94struct BinEventPt {
97 double w;
98
105 bool operator==(const BinEventPt &o) const {
106 return e == o.e && d == o.d && round(WEIGHT_PRECISION*w) == round(WEIGHT_PRECISION*o.w);
107 }
108
115 bool operator<(const BinEventPt &o) const {
116 return e < o.e || (e == o.e && d < o.d) || (e == o.e && d == o.d && round(WEIGHT_PRECISION*w) < round(WEIGHT_PRECISION*o.w));
117 }
118};
119
120
128};
129
130
135struct EventStat {
136 uint64_t seen;
137 uint64_t code;
138 uint64_t priority;
139};
140
141
146typedef std::map<BinEventPt, EventStat> EventMap;
147
148
153typedef std::map<uint64_t, BinEventPt> PriorityMap;
154
155
159typedef std::map<uint64_t, uint64_t> EventCodeMap;
160
161
166 uint64_t n_succ_seen;
167 uint64_t n_succ_target;
168 uint64_t n_incl_seen;
169 uint64_t n_incl_target;
170 uint64_t sum_dep;
171 int n_dep;
172};
173
174
178typedef std::map<uint64_t, CodeInTreeStatistics> CodeInTreeStatMap;
179
180
185 uint64_t seen;
187};
188
189
194typedef std::map<ElementHash, StringUsage> StringUsageMap;
195
196
201typedef std::vector<ElementHash> ClientIDs;
202
203
208typedef std::set<ElementHash> ClientIDSet;
209
210
213typedef std::map<TimePoint, uint64_t> Clip;
214
215
220typedef std::map<ElementHash, Clip> ClipMap;
221typedef ClipMap * pClipMap;
222
223
228typedef std::map<ElementHash, TimePoint> TargetMap;
230
231
236typedef std::vector<double> TimesToTarget;
237
238
241typedef std::set<uint64_t> CodeSet;
242typedef CodeSet * pCodeSet;
243
244
248 double t_hat;
251};
252
253
256typedef std::vector<OptimizeEvalItem> OptimizeEval;
257
258
262 uint64_t code;
263 double score;
264};
265
266
269typedef std::vector <CodeScoreItem> CodeScores;
270
271
274typedef std::map<uint64_t, int> ChildIndex;
275
276
280 uint64_t n_seen;
281 uint64_t n_target;
284};
286
287
292typedef std::vector<CodeTreeNode> CodeTree;
294
295
299
300
304
305
306// Forward declaration of utilities used in other functions.
307uint64_t MurmurHash64A (const void *key, int len);
308bool image_put(pBinaryImage p_bi, void *p_data, int size);
309bool image_get(pBinaryImage p_bi, int &c_block, int &c_ofs, void *p_data, int size);
310
311
314class Logger {
315
316 public:
317
325 void log_printf(const char *fmt, ...) {
326 va_list args;
327 va_start(args, fmt);
328 log_printf(fmt, args);
329 va_end(args);
330 }
331
337 void log_printf(const char *fmt, va_list args) {
338 char buffer[256];
339
340 vsnprintf(buffer, sizeof(buffer), fmt, args);
341
342 log = log + buffer;
343 }
344
348};
349
350
351class Clips; // Forward declaration
352
363class Events {
364
365 public:
366
368
369 bool store_strings = true;
371
372
382 void insert_row(pChar p_e,
383 pChar p_d,
384 double w);
385
386
399 bool define_event(pChar p_e,
400 pChar p_d,
401 double w,
402 uint64_t code);
403
404
443 String optimize_events (Clips &clips, TargetMap &targets, int num_steps = 10, int codes_per_step = 5, double threshold = 0.0001,
444 pCodeSet p_force_include = nullptr, pCodeSet p_force_exclude = nullptr, Transform x_form = tr_linear,
445 Aggregate agg = ag_longest, double p = 0.5, int depth = 1000, bool as_states = true,
446 double exp_decay = 0.00693, double lower_bound_p = 0.95, bool log_lift = true);
447
448
467 bool score_model(double &score, double &targ_prop, CodeInTreeStatMap &codes_stat, bool calc_tree_stats, Clips &clips,
468 TargetMap &targets, EventCodeMap code_dict, Transform x_form, Aggregate agg, double p, int depth, bool as_states);
469
470
483 CodeScores get_top_codes(CodeInTreeStatMap &codes_stat, double targ_prop, double exp_decay, double lower_bound_p,
484 bool log_lift);
485
486
494 ExtFloat s_h = 0, s_o = 0, sho = 0, ssh = 0, sso = 0;
495 int n = 0;
496
497 for (OptimizeEval::iterator it = ev.begin(); it != ev.end(); ++it) {
498 if (it->t_obs != 0) {
499 s_h += it->t_hat;
500 s_o += it->t_obs;
501 sho += it->t_hat*it->t_obs;
502 ssh += it->t_hat*it->t_hat;
503 sso += it->t_obs*it->t_obs;
504
505 n++;
506 }
507 }
508 if (n == 0)
509 return 0;
510
511 double d2 = (n*ssh - s_h*s_h)*(n*sso - s_o*s_o);
512
513 if (d2 <= 1e-20)
514 return 0;
515
516 return (n*sho - s_h*s_o)/sqrt(d2);
517 }
518
519
526 bool load(pBinaryImage &p_bi);
527
528
537 bool load(pBinaryImage &p_bi, int &c_block, int &c_ofs);
538
539
546 bool save(pBinaryImage &p_bi);
547
548
553 inline void set_max_num_events(int max_events) {
554 max_num_events = max_events;
555 }
556
557
562 inline void set_store_strings(bool store) {
563 store_strings = store;
564 }
565
566
573 inline ElementHash add_str(pChar p_str) {
574 int ll = strlen(p_str);
575
576 if (!ll)
577 return 0;
578
579 ElementHash hash = MurmurHash64A(p_str, ll);
580
581 if (!store_strings)
582 return hash;
583
584 StringUsageMap::iterator it = names_map.find(hash);
585
586 if (it != names_map.end())
587 it->second.seen++;
588 else {
589 StringUsage su = {1, p_str};
590
591 names_map[hash] = su;
592 }
593 return hash;
594 }
595
596
602 inline void erase_str(ElementHash hash) {
603
604 if (store_strings) {
605 StringUsageMap::iterator it = names_map.find(hash);
606
607 if (it != names_map.end()) {
608 if (--it->second.seen == 0)
609 names_map.erase(it);
610 }
611 }
612 }
613
614
622
623 if (store_strings) {
624 if (!hash)
625 return "";
626
627 StringUsageMap::iterator it = names_map.find(hash);
628
629 if (it != names_map.end())
630 return it->second.str;
631 }
632 return "\x04";
633 }
634
635
642 inline uint64_t event_code(BinEventPt &ept) {
643 EventMap::iterator it = event.find(ept);
644
645 if (it == event.end())
646 return 0;
647
648 return it->second.code;
649 }
650
651
656 inline int num_events() {
657 return event.size();
658 }
659
660
665 inline EventMap::iterator events_begin() {
666 return event.begin();
667 }
668
669
674 inline EventMap::iterator events_end() {
675 return event.end();
676 }
677
678
685 inline EventMap::iterator events_next_after_find(BinEventPt &ept) {
686 EventMap::iterator it = event.find(ept);
687
688 if (it != event.end())
689 ++it;
690
691 return it;
692 }
693
694#ifndef TEST
695 private:
696#endif
697
698 uint64_t priority_low = 0;
699 uint64_t next_code = 0;
700
701 StringUsageMap names_map = {};
702 EventMap event = {};
703 PriorityMap priority = {};
704};
705
706
710class Clients {
711
712 public:
713
715
716
724 int ll = strlen(p_cli);
725
726 return ll == 0 ? 0 : MurmurHash64A(p_cli, ll);
727 }
728
729
734 void add_client_id(pChar p_cli);
735
736
743 bool load(pBinaryImage &p_bi);
744
745
754 bool load(pBinaryImage &p_bi, int &c_block, int &c_ofs);
755
756
763 bool save(pBinaryImage &p_bi);
764
765 ClientIDs id = {};
767};
768
769
772class TimeUtil {
773
774 public:
775
777
778 char time_format[128] = "%Y-%m-%d %H:%M:%S";
779
780
788
789 TimeStruct ts = {0};
790
791 if (strptime(p_t, time_format, &ts) == nullptr)
792 return -1;
793
794 return timegm(&ts);
795 }
796
797
803 inline void set_time_format(pChar fmt) {
804 strncpy(time_format, fmt, sizeof(time_format) - 1);
805 }
806};
807
808
813class Clips : public TimeUtil {
814
815 public:
816
820 Clips() {}
821
822
828 Clips(Clients clients, Events events) : clients(clients), events(events) {}
829
830
836 clients = Clients();
837 events = Events();
838
839 clips = clip_map;
840 }
841
842
847 Clips(Clips &o_clips) {
848 pBinaryImage p_bi = new BinaryImage;
849
850 o_clips.save(p_bi);
851
852 load(p_bi);
853
854 delete p_bi;
855 }
856
857
869 bool scan_event(pChar p_e,
870 pChar p_d,
871 double w,
872 pChar p_c,
873 pChar p_t);
874
875
883 inline void insert_event(ElementHash client_hash,
884 uint64_t code,
885 TimePoint time_pt) {
886
887 ClipMap::iterator it = clips.find(client_hash);
888
889 if (it == clips.end()) {
890 Clip clip = {};
891
892 clip[time_pt] = code;
893
894 clips[client_hash] = clip;
895
896 } else
897 it->second[time_pt] = code;
898 };
899
900
907 bool load(pBinaryImage &p_bi);
908
909
916 bool save(pBinaryImage &p_bi);
917
918
924 return &clips;
925 }
926
927
932 inline uint64_t num_events() {
933
934 uint64_t ret = 0;
935
936 for (ClipMap::iterator it = clips.begin(); it != clips.end(); ++it)
937 ret += it->second.size();
938
939 return ret;
940 }
941
946 inline void collapse_to_states() {
947 for (ClipMap::iterator it_client = clips.begin(); it_client != clips.end(); ++it_client) {
948 uint64_t last_code = 0xA30BdefacedCabal;
949 for (Clip::const_iterator it = it_client->second.cbegin(); it != it_client->second.cend();) {
950 uint64_t code = it->second;
951 if (code == last_code)
952 it_client->second.erase(it++);
953 else
954 ++it;
955 last_code = code;
956 }
957 }
958 }
959
960#ifndef TEST
961 private:
962#endif
963
964 Clients clients;
965 Events events;
966 ClipMap clips = {};
967};
968
969
973class Targets : public TimeUtil {
974
975 public:
976
982 Targets(pClipMap p_clips, TargetMap target) : p_clips(p_clips), target(target) {
983 CodeTreeNode root = {0, 0, 0, {}};
984 tree.push_back(root);
985 }
986
987
998 bool insert_target(pChar p_c, pChar p_t);
999
1000
1015 bool fit(Transform x_form, Aggregate agg, double p, int depth, bool as_states);
1016
1017
1025
1026
1038
1039
1049
1050
1062 void verbose_predict_clip(const ElementHash &client,
1063 Clip &clip,
1064 TimePoint &obs_time,
1065 bool &target_yn,
1066 int &longest_seq,
1067 uint64_t &n_visits,
1068 uint64_t &n_targets,
1069 double &targ_mean_t);
1070
1071
1078 bool load(pBinaryImage &p_bi);
1079
1080
1087 bool save(pBinaryImage &p_bi);
1088
1089
1099 inline int update_node(int idx_parent, uint64_t code, bool target, ExtFloat time_d) {
1100
1101 if (idx_parent == 0) { // The root node contains the prediction of the zero-length clip.
1102 tree[0].n_seen++;
1103 if (target) {
1104 tree[0].n_target++;
1105 tree[0].sum_time_d += time_d;
1106 }
1107 }
1108
1109 ChildIndex::iterator it = tree[idx_parent].child.find(code);
1110
1111 if (it != tree[idx_parent].child.end()) {
1112 int idx = it->second;
1113
1114 tree[idx].n_seen++;
1115 if (target) {
1116 tree[idx].n_target++;
1117 tree[idx].sum_time_d += time_d;
1118 }
1119
1120 return idx;
1121 }
1122
1123 CodeTreeNode node = {1, target, time_d, {}};
1124
1125 tree.push_back(node);
1126
1127 int idx = tree.size() - 1;
1128
1129 tree[idx_parent].child[code] = idx;
1130
1131 return idx;
1132 }
1133
1134
1141 inline double normal_pdf(double x) {
1142 // https://stackoverflow.com/questions/10847007/using-the-gaussian-probability-density-function-in-c
1143
1144 static const double inv_sqrt_2pi = 0.3989422804014327;
1145
1146 return exp(-0.5*x*x)*inv_sqrt_2pi;
1147 }
1148
1149
1156 inline double normal_cdf(double x) {
1157 // https://stackoverflow.com/questions/2328258/cumulative-normal-distribution-function-in-c-c
1158 // https://cplusplus.com/reference/cmath/erfc/
1159
1160 static const double m_sqrt_dot_5 = 0.7071067811865476;
1161
1162 return 0.5*erfc(-x*m_sqrt_dot_5);
1163 }
1164
1165
1176 inline double agresti_coull_upper_bound(uint64_t n_hits, uint64_t n_total) {
1177 // https://github.com/msn0/agresti-coull-interval/blob/master/src/agresti.js
1178
1179 double n_tilde = n_total + binomial_z_sqr;
1180 double p_tilde = (n_hits + binomial_z_sqr_div_2)/n_tilde;
1181 double a = binomial_z*sqrt(p_tilde*(1 - p_tilde)/n_tilde);
1182
1183 return p_tilde + a;
1184 }
1185
1186
1197 inline double agresti_coull_lower_bound(uint64_t n_hits, uint64_t n_total) {
1198 // https://github.com/msn0/agresti-coull-interval/blob/master/src/agresti.js
1199
1200 double n_tilde = n_total + binomial_z_sqr;
1201 double p_tilde = (n_hits + binomial_z_sqr_div_2)/n_tilde;
1202 double a = binomial_z*sqrt(p_tilde*(1 - p_tilde)/n_tilde);
1203
1204 return p_tilde - a;
1205 }
1206
1207
1231 inline double predict_time(CodeTreeNode &node) {
1232
1233 if (node.n_target <= 0)
1234 return PREDICT_MAX_TIME;
1235
1236 double lb = std::max(1e-4, agresti_coull_lower_bound(node.n_target, node.n_seen));
1237 double mu_hat = transform == tr_linear ? ((double) node.sum_time_d)/node.n_target : exp(((double) node.sum_time_d)/node.n_target);
1238
1239 return mu_hat/lb;
1240 }
1241
1242
1249 inline double predict_clip(Clip clip) {
1250
1251 int idx = 0, n = 0;
1252
1253 double t[MAX_SEQ_LEN_IN_PREDICT];
1254
1255 for (Clip::reverse_iterator it = clip.rbegin(); it != clip.rend(); it++) {
1256 ChildIndex::iterator jt = tree[idx].child.find(it->second);
1257
1258 if (jt == tree[idx].child.end())
1259 break;
1260
1261 idx = jt->second;
1262
1263 t[n++] = predict_time(tree[idx]);
1264 }
1265
1266 if (n == 0)
1267 return predict_time(tree[0]);
1268
1269 if (aggregate == ag_longest)
1270 return t[n - 1];
1271
1272 double ret = t[0];
1273
1274 if (aggregate == ag_mean) {
1275 for (int i = 1; i < n; i++)
1276 ret += t[i];
1277
1278 return ret/n;
1279 }
1280
1281 for (int i = 1; i < n; i++)
1282 ret = std::min(ret, t[i]);
1283
1284 return ret;
1285 }
1286
1287
1298 bool recurse_tree_stats(int depth, int idx, int parent_idx, uint64_t code, CodeInTreeStatMap &codes_stat);
1299
1300
1305 inline int num_targets() {
1306 return target.size();
1307 }
1308
1309
1314 inline int tree_size() {
1315 return tree.size();
1316 }
1317
1318
1324 return p_clips;
1325 }
1326
1327
1333 return &tree;
1334 }
1335
1336
1342 return &target;
1343 }
1344
1345#ifndef TEST
1346 private:
1347#endif
1348
1349 pClipMap p_clips;
1350 TargetMap target;
1351 CodeTree tree = {};
1352 Transform transform = tr_undefined;
1353 Aggregate aggregate = ag_undefined;
1354 double binomial_z = 0;
1355 double binomial_z_sqr = 0;
1356 double binomial_z_sqr_div_2 = 0;
1357 int tree_depth = 0;
1358};
1359
1360} // namespace reels
1361
1362#endif // ifndef INCLUDED_REELS_TYPES
A container class to hold client ids.
Definition: reels.h:710
void add_client_id(pChar p_cli)
Add a client ID to this container.
Definition: reels.cpp:813
ElementHash hash_client_id(pChar p_cli)
Return the hash of a client ID as an ElementHash.
Definition: reels.h:723
Clients()
Definition: reels.h:714
bool save(pBinaryImage &p_bi)
Save the state of an object into a base64 mercury-dynamics serialization using image_put()
Definition: reels.cpp:863
ClientIDSet id_set
The set of the same hashes for fast search.
Definition: reels.h:766
bool load(pBinaryImage &p_bi)
Load the state of an object from a base64 mercury-dynamics serialization using image_get()
Definition: reels.cpp:822
A container class to hold clips (sequences of events).
Definition: reels.h:813
pClipMap clip_map()
The address of the internal ClipMap to be accessed from a Targets object.
Definition: reels.h:923
void insert_event(ElementHash client_hash, uint64_t code, TimePoint time_pt)
The kernel of a scan_event() made inline, when all checks and conversion to binary are successful.
Definition: reels.h:883
Clips()
Default construct a Clips object as an abstract method. This is required for declaring a Clips object...
Definition: reels.h:820
void collapse_to_states()
Collapse the ClipMap to states.
Definition: reels.h:946
Clips(Clients clients, Events events)
Construct a Clips object from a Clients and an Events objects.
Definition: reels.h:828
bool load(pBinaryImage &p_bi)
Load the state of an object from a base64 mercury-dynamics serialization using image_get()
Definition: reels.cpp:936
Clips(Clips &o_clips)
Copy-construct a Clips object.
Definition: reels.h:847
bool scan_event(pChar p_e, pChar p_d, double w, pChar p_c, pChar p_t)
Process a row from a transaction file, to add the event to the client's timeline (clip).
Definition: reels.cpp:891
Clips(const ClipMap &clip_map)
Construct a Clips object from a ClipMap to be copied.
Definition: reels.h:835
uint64_t num_events()
Return the number of events stored in the internal ClipMap.
Definition: reels.h:932
bool save(pBinaryImage &p_bi)
Save the state of an object into a base64 mercury-dynamics serialization using image_put()
Definition: reels.cpp:1005
A container class to hold events.
Definition: reels.h:363
String optimize_events(Clips &clips, TargetMap &targets, int num_steps=10, int codes_per_step=5, double threshold=0.0001, pCodeSet p_force_include=nullptr, pCodeSet p_force_exclude=nullptr, Transform x_form=tr_linear, Aggregate agg=ag_longest, double p=0.5, int depth=1000, bool as_states=true, double exp_decay=0.00693, double lower_bound_p=0.95, bool log_lift=true)
Events optimizer.
Definition: reels.cpp:275
uint64_t event_code(BinEventPt &ept)
Return the code associated to an BinEventPt if found in the object.
Definition: reels.h:642
EventMap::iterator events_next_after_find(BinEventPt &ept)
Return the EventMap::iterator to the next BinEventPt after matching ev or nullptr if not found or is ...
Definition: reels.h:685
void insert_row(pChar p_e, pChar p_d, double w)
Process a row from a transaction file.
Definition: reels.cpp:202
bool load(pBinaryImage &p_bi)
Load the state of an object from a base64 mercury-dynamics serialization using image_get()
Definition: reels.cpp:605
EventMap::iterator events_begin()
Return the EventMap::iterator to the first elements in the private variable .events.
Definition: reels.h:665
EventMap::iterator events_end()
Return the EventMap::iterator to past-the-end in the private variable .events.
Definition: reels.h:674
int num_events()
Return the number of events stored in the object.
Definition: reels.h:656
void erase_str(ElementHash hash)
Remove a string from the StringUsageMap by decreasing its use count and destroying it if not used any...
Definition: reels.h:602
bool define_event(pChar p_e, pChar p_d, double w, uint64_t code)
Define events explicitly.
Definition: reels.cpp:250
ElementHash add_str(pChar p_str)
Define a new string and push it into the StringUsageMap.
Definition: reels.h:573
bool save(pBinaryImage &p_bi)
Save the state of an object into a base64 mercury-dynamics serialization using image_put()
Definition: reels.cpp:735
double linear_correlation(OptimizeEval &ev)
Compute Pearson linear correlation between predicted and observed in an OptimizeEval.
Definition: reels.h:493
bool store_strings
If true, the object stores the string values.
Definition: reels.h:369
bool score_model(double &score, double &targ_prop, CodeInTreeStatMap &codes_stat, bool calc_tree_stats, Clips &clips, TargetMap &targets, EventCodeMap code_dict, Transform x_form, Aggregate agg, double p, int depth, bool as_states)
Internal: Do one step of the optimize_events() method.
Definition: reels.cpp:439
CodeScores get_top_codes(CodeInTreeStatMap &codes_stat, double targ_prop, double exp_decay, double lower_bound_p, bool log_lift)
Internal: Extract the top top_n codes by lift from a CodeInTreeStatMap map.
Definition: reels.cpp:535
String get_str(ElementHash hash)
Get a string content from its hash value.
Definition: reels.h:621
void set_store_strings(bool store)
Sets the public property store_strings to simplify the python interface.
Definition: reels.h:562
void set_max_num_events(int max_events)
Sets the public property max_num_events to simplify the python interface.
Definition: reels.h:553
Events()
Definition: reels.h:367
int max_num_events
The maximum number of recurrent event stored via insert_row()
Definition: reels.h:370
A minimalist logger stored as a std::string providing sprintf functionality.
Definition: reels.h:314
String log
The std::string storing the content of the Logger is public.
Definition: reels.h:347
void log_printf(const char *fmt, va_list args)
Add a nicely formatted string smaller than 256 chars to the logger.
Definition: reels.h:337
void log_printf(const char *fmt,...)
Logging method wrapper supporting variable arguments.
Definition: reels.h:325
A container class to hold target events and do predictions based on clips.
Definition: reels.h:973
bool fit(Transform x_form, Aggregate agg, double p, int depth, bool as_states)
Fit the prediction model.
Definition: reels.cpp:1080
pTargetMap p_target()
The address of the internal TargetMap.
Definition: reels.h:1341
int num_targets()
Return the size of the internal TargetMap.
Definition: reels.h:1305
double agresti_coull_upper_bound(uint64_t n_hits, uint64_t n_total)
Upper bound of the Agresti-Coull confidence interval for a binomial proportion.
Definition: reels.h:1176
pClipMap clip_map()
The address of the internal ClipMap.
Definition: reels.h:1323
void verbose_predict_clip(const ElementHash &client, Clip &clip, TimePoint &obs_time, bool &target_yn, int &longest_seq, uint64_t &n_visits, uint64_t &n_targets, double &targ_mean_t)
Predict time for a single Clip returning all kind of prediction related information.
Definition: reels.cpp:1225
int tree_size()
Return the size of the internal CodeTree.
Definition: reels.h:1314
double normal_pdf(double x)
Density (pdf) for the normal distribution with mean 0 and standard deviation 1.
Definition: reels.h:1141
Targets(pClipMap p_clips, TargetMap target)
Construct a Targets object from a Clips object and a TargetMap.
Definition: reels.h:982
double predict_time(CodeTreeNode &node)
Predict the time to target for a sub-clip that starts at a node.
Definition: reels.h:1231
pCodeTree p_tree()
The address of the internal CodeTree.
Definition: reels.h:1332
bool load(pBinaryImage &p_bi)
Load the state of an object from a base64 mercury-dynamics serialization using image_get()
Definition: reels.cpp:1301
double predict_clip(Clip clip)
Predict the time to target for a clip.
Definition: reels.h:1249
bool insert_target(pChar p_c, pChar p_t)
Utility to fill the internal TargetMap target.
Definition: reels.cpp:1057
double agresti_coull_lower_bound(uint64_t n_hits, uint64_t n_total)
Lower bound of the Agresti-Coull confidence interval for a binomial proportion.
Definition: reels.h:1197
bool save(pBinaryImage &p_bi)
Save the state of an object into a base64 mercury-dynamics serialization using image_put()
Definition: reels.cpp:1457
int update_node(int idx_parent, uint64_t code, bool target, ExtFloat time_d)
Update (fit) the CodeTree inserting new nodes as necessary.
Definition: reels.h:1099
TimesToTarget predict()
Predict time to target for all the clients in the Clips object used to fit the model.
Definition: reels.cpp:1173
bool recurse_tree_stats(int depth, int idx, int parent_idx, uint64_t code, CodeInTreeStatMap &codes_stat)
Recursive tree exploration updating a CodeInTreeStatMap map.
Definition: reels.cpp:1273
double normal_cdf(double x)
Cumulative distribution (cdf) for the normal distribution with mean 0 and standard deviation 1.
Definition: reels.h:1156
A common ancestor of Clips and Targets to avoid duplicating time management.
Definition: reels.h:772
TimeUtil()
Definition: reels.h:776
char time_format[128]
Date and time format for insert_row() and define_event()
Definition: reels.h:778
void set_time_format(pChar fmt)
Sets the public property time_format to simplify the python interface.
Definition: reels.h:803
TimePoint get_time(pChar p_t)
Convert time as a string to a TimePoint (using the object's time_format).
Definition: reels.h:787
The namespace including everything to simplify using Reels in a c++ application,.
Definition: reels.cpp:35
std::vector< CodeScoreItem > CodeScores
CodeScores: A vector of CodeScoreItem.
Definition: reels.h:269
std::vector< OptimizeEvalItem > OptimizeEval
OptimizeEval: A vector of OptimizeEvalItem.
Definition: reels.h:256
double ExtFloat
Accumulator type: Was a 128 bit float, changed to 64 for macos compatibility.
Definition: reels.h:71
TargetMap * pTargetMap
Pointer to a TargetMap.
Definition: reels.h:229
std::set< uint64_t > CodeSet
CodeSet: A set of event codes.
Definition: reels.h:241
CodeTree * pCodeTree
Pointer to a CodeTree.
Definition: reels.h:293
std::vector< ElementHash > ClientIDs
ClientIDs: A vector of client ID hashes.
Definition: reels.h:201
std::map< TimePoint, uint64_t > Clip
Clip: The clip (timeline) of a client is just a map of time points and codes.
Definition: reels.h:213
std::map< uint64_t, BinEventPt > PriorityMap
PriorityMap: A map with all the acceptable priority values in the EventMap as keys.
Definition: reels.h:153
time_t TimePoint
A c 8 byte integer time point.
Definition: reels.h:69
const char * pChar
A c string.
Definition: reels.h:67
CodeTreeNode * pCodeTreeNode
Pointer to a CodeTreeNode.
Definition: reels.h:285
std::map< ElementHash, Clip > ClipMap
ClipMap: A map from clients to clips.
Definition: reels.h:220
uint64_t ElementHash
A binary hash of a string.
Definition: reels.h:65
std::set< ElementHash > ClientIDSet
ClientIDSet: A set of client ID hashes.
Definition: reels.h:208
std::map< BinEventPt, EventStat > EventMap
EventMap: A map from hashes in an BinEventPt to usage data defines the info about an event.
Definition: reels.h:146
std::string String
A dynamically allocated c++ string.
Definition: reels.h:66
ClipMap * pClipMap
Pointer to a ClipMap.
Definition: reels.h:221
std::map< uint64_t, int > ChildIndex
ChildIndex: A map to find the next child in a CodeTree.
Definition: reels.h:274
bool image_put(pBinaryImage p_bi, void *p_data, int size)
A function to push arbitrary raw data into a BinaryImage.
Definition: reels.cpp:106
struct tm TimeStruct
A c structure of integer fields.
Definition: reels.h:70
BinaryImage * pBinaryImage
A pointer to BinaryImage.
Definition: reels.h:87
std::vector< ImageBlock > BinaryImage
An array of generic blocks to serialize anything.
Definition: reels.h:86
std::map< uint64_t, uint64_t > EventCodeMap
EventCodeMap: A map converting the space of Event codes into a lower cardinality set for Event optimi...
Definition: reels.h:159
std::map< ElementHash, TimePoint > TargetMap
TargetMap: A map from clients to target event TimePoints.
Definition: reels.h:228
Transform
Transform: The transformation applied to time differences. (And inverted again in predict()....
Definition: reels.h:298
@ tr_linear
Definition: reels.h:298
@ tr_undefined
Definition: reels.h:298
@ tr_log
Definition: reels.h:298
std::map< uint64_t, CodeInTreeStatistics > CodeInTreeStatMap
CodeInTreeStatMap: A map to store all the CodeInTreeStatistics by code.
Definition: reels.h:178
std::vector< CodeTreeNode > CodeTree
CodeTree: A tree of fitted targets.
Definition: reels.h:292
CodeSet * pCodeSet
Pointer to a CodeSet.
Definition: reels.h:242
uint64_t MurmurHash64A(const void *key, int len)
MurmurHash2, 64-bit versions, by Austin Appleby.
Definition: reels.cpp:56
std::map< ElementHash, StringUsage > StringUsageMap
StringUsageMap: A map from hashes to string and number of times the string is used.
Definition: reels.h:194
bool image_get(pBinaryImage p_bi, int &c_block, int &c_ofs, void *p_data, int size)
A function to get an arbitrary raw data block from a BinaryImage.
Definition: reels.cpp:168
Aggregate
Aggregate: The method used to aggregate predictions for different sequence lengths.
Definition: reels.h:303
@ ag_undefined
Definition: reels.h:303
@ ag_longest
Definition: reels.h:303
@ ag_mean
Definition: reels.h:303
@ ag_minimax
Definition: reels.h:303
std::vector< double > TimesToTarget
TimesToTarget: A vector of predictions.
Definition: reels.h:236
ClientsServer clients
Definition: reels.cpp:1580
ClipsServer clips
Definition: reels.cpp:1581
TargetsServer targets
Definition: reels.cpp:1582
EventsServer events
Definition: reels.cpp:1579
#define PREDICT_MAX_TIME
Hundred years when the target was never seen.
Definition: reels.h:62
#define MAX_SEQ_LEN_IN_PREDICT
The maximum sequence length used in prediction.
Definition: reels.h:61
#define IMAGE_BUFF_SIZE
Makes sizeof(ImageBlock) == 6K.
Definition: reels.h:58
#define DEFAULT_NUM_EVENTS
A size to store events in an Events object by default.
Definition: reels.h:60
#define WEIGHT_PRECISION
10^ the number of digits at which weight is rounded
Definition: reels.h:63
The binary representation of an event as stored in a transaction file.
Definition: reels.h:94
double w
The "weight". A double representing a weight of the event.
Definition: reels.h:97
bool operator<(const BinEventPt &o) const
Compare to another BinEventPt for strict order to support use as a key in a map.
Definition: reels.h:115
ElementHash d
The "description". A binary hash of a string representing "the event".
Definition: reels.h:96
bool operator==(const BinEventPt &o) const
Compare to another BinEventPt for identity to support use as a key in a map.
Definition: reels.h:105
ElementHash e
The "emitter". A binary hash of a string representing "owner of event".
Definition: reels.h:95
The binary representation of a transaction in a transaction file.
Definition: reels.h:125
ElementHash c
The "client". A binary hash of a string representing "the actor".
Definition: reels.h:126
TimePoint t
The "time". A timestamp of the event.
Definition: reels.h:127
CodeInTreeStatistics: A structure to compute aggregated statistics of for each code.
Definition: reels.h:165
uint64_t n_succ_target
Definition: reels.h:167
int n_dep
Number of elements sum_dep has.
Definition: reels.h:171
uint64_t n_incl_target
Definition: reels.h:169
uint64_t n_succ_seen
Definition: reels.h:166
uint64_t sum_dep
Sum of tree depth to estimate mean depth.
Definition: reels.h:170
uint64_t n_incl_seen
Definition: reels.h:168
CodeScoreItem: A structure to sort codes by lift.
Definition: reels.h:261
double score
The score.
Definition: reels.h:263
uint64_t code
The code.
Definition: reels.h:262
CodeTreeNode: Each node in a fitted CodeTree.
Definition: reels.h:279
uint64_t n_target
The number of clips that visited the node with the target.
Definition: reels.h:281
uint64_t n_seen
The number of clips that visited the node (target and no target).
Definition: reels.h:280
ExtFloat sum_time_d
Sum of time differences for the elements with a defined target.
Definition: reels.h:282
ChildIndex child
A map of children by code (key) to index in the CodeTree.
Definition: reels.h:283
The metrics associated to an event identified by a BinEventPt.
Definition: reels.h:135
uint64_t seen
Number of times the event has been seen in the data.
Definition: reels.h:136
uint64_t priority
The (unique) current priority assigned in the priority queue (set) to this event.
Definition: reels.h:138
uint64_t code
A code number identifying the event.
Definition: reels.h:137
An generic block structure to store object state in a Python-friendly way.
Definition: reels.h:78
int block_num
The current block number in the BinaryImage.
Definition: reels.h:80
uint8_t buffer[IMAGE_BUFF_SIZE]
The buffer.
Definition: reels.h:82
int size
The number of already allocated bytes inside the current block.
Definition: reels.h:79
OptimizeEvalItem: A structure to compare predicted and observed.
Definition: reels.h:247
TimePoint t_obs
The observed result: Zero for not a target or elapsed time since the previous event in clip to target...
Definition: reels.h:249
int seq_len
The length of the predicting clip.
Definition: reels.h:250
double t_hat
The prediction (elapsed time since the last event in clip to predicted target).
Definition: reels.h:248
StringUsage: A pair of String and number of times it is used.
Definition: reels.h:184
String str
The string as plain text.
Definition: reels.h:186
uint64_t seen
Number of times string is used. Increase by add_str() calls to the same string, decreased/destroyed b...
Definition: reels.h:185