39#ifndef INCLUDED_CATCH2
40#define INCLUDED_CATCH2
48#ifndef INCLUDED_REELS_TYPES
49#define INCLUDED_REELS_TYPES
58#define IMAGE_BUFF_SIZE 6136
59#define PRIORITY_SEEN_FACTOR 2000000000
60#define DEFAULT_NUM_EVENTS 1000
61#define MAX_SEQ_LEN_IN_PREDICT 1000
62#define PREDICT_MAX_TIME (100*365.25*24*3600)
63#define WEIGHT_PRECISION 10000
213typedef std::map<TimePoint, uint64_t>
Clip;
343 vsnprintf(buffer,
sizeof(buffer), fmt, args);
449 double exp_decay = 0.00693,
double lower_bound_p = 0.95,
bool log_lift =
true);
497 ExtFloat s_h = 0, s_o = 0, sho = 0, ssh = 0, sso = 0;
500 for (OptimizeEval::iterator it = ev.begin(); it != ev.end(); ++it) {
501 if (it->t_obs != 0) {
504 sho += it->t_hat*it->t_obs;
505 ssh += it->t_hat*it->t_hat;
506 sso += it->t_obs*it->t_obs;
514 double d2 = (n*ssh - s_h*s_h)*(n*sso - s_o*s_o);
519 return (n*sho - s_h*s_o)/sqrt(d2);
577 int ll = strlen(p_str);
587 StringUsageMap::iterator it = names_map.find(hash);
589 if (it != names_map.end())
594 names_map[hash] = su;
608 StringUsageMap::iterator it = names_map.find(hash);
610 if (it != names_map.end()) {
611 if (--it->second.seen == 0)
630 StringUsageMap::iterator it = names_map.find(hash);
632 if (it != names_map.end())
633 return it->second.str;
646 EventMap::iterator it =
event.find(ept);
648 if (it == event.end())
651 return it->second.code;
669 return event.begin();
689 EventMap::iterator it =
event.find(ept);
691 if (it != event.end())
701 uint64_t priority_low = 0;
702 uint64_t next_code = 0;
727 int ll = strlen(p_cli);
890 ClipMap::iterator it = clips.find(client_hash);
892 if (it == clips.end()) {
895 clip[time_pt] = code;
897 clips[client_hash] = clip;
900 it->second[time_pt] = code;
939 for (ClipMap::iterator it = clips.begin(); it != clips.end(); ++it)
940 ret += it->second.size();
950 for (ClipMap::iterator it_client = clips.begin(); it_client != clips.end(); ++it_client) {
951 uint64_t last_code = 0xA30BdefacedCabal;
952 for (Clip::const_iterator it = it_client->second.cbegin(); it != it_client->second.cend();) {
953 uint64_t code = it->second;
954 if (code == last_code)
955 it_client->second.erase(it++);
987 tree.push_back(root);
1071 uint64_t &n_targets,
1072 double &targ_mean_t);
1104 if (idx_parent == 0) {
1108 tree[0].sum_time_d += time_d;
1112 ChildIndex::iterator it = tree[idx_parent].child.find(code);
1114 if (it != tree[idx_parent].child.end()) {
1115 int idx = it->second;
1119 tree[idx].n_target++;
1120 tree[idx].sum_time_d += time_d;
1128 tree.push_back(node);
1130 int idx = tree.size() - 1;
1132 tree[idx_parent].child[code] = idx;
1147 static const double inv_sqrt_2pi = 0.3989422804014327;
1149 return exp(-0.5*x*x)*inv_sqrt_2pi;
1163 static const double m_sqrt_dot_5 = 0.7071067811865476;
1165 return 0.5*erfc(-x*m_sqrt_dot_5);
1182 double n_tilde = n_total + binomial_z_sqr;
1183 double p_tilde = (n_hits + binomial_z_sqr_div_2)/n_tilde;
1184 double a = binomial_z*sqrt(p_tilde*(1 - p_tilde)/n_tilde);
1203 double n_tilde = n_total + binomial_z_sqr;
1204 double p_tilde = (n_hits + binomial_z_sqr_div_2)/n_tilde;
1205 double a = binomial_z*sqrt(p_tilde*(1 - p_tilde)/n_tilde);
1258 for (Clip::reverse_iterator it = clip.rbegin(); it != clip.rend(); it++) {
1259 ChildIndex::iterator jt = tree[idx].child.find(it->second);
1261 if (jt == tree[idx].child.end())
1278 for (
int i = 1; i < n; i++)
1284 for (
int i = 1; i < n; i++)
1285 ret = std::min(ret, t[i]);
1309 return target.size();
1357 double binomial_z = 0;
1358 double binomial_z_sqr = 0;
1359 double binomial_z_sqr_div_2 = 0;
A container class to hold client ids.
Definition reels.h:713
void add_client_id(pChar p_cli)
Add a client ID to this container.
Definition reels.cpp:800
ElementHash hash_client_id(pChar p_cli)
Return the hash of a client ID as an ElementHash.
Definition reels.h:726
Clients()
Definition reels.h:717
bool save(pBinaryImage &p_bi)
Save the state of an object into a base64 mercury-dynamics serialization using image_put()
Definition reels.cpp:849
ClientIDSet id_set
The set of the same hashes for fast search.
Definition reels.h:769
bool load(pBinaryImage &p_bi)
Load the state of an object from a base64 mercury-dynamics serialization using image_get()
Definition reels.cpp:809
A container class to hold clips (sequences of events).
Definition reels.h:816
pClipMap clip_map()
The address of the internal ClipMap to be accessed from a Targets object.
Definition reels.h:926
void insert_event(ElementHash client_hash, uint64_t code, TimePoint time_pt)
The kernel of a scan_event() made inline, when all checks and conversion to binary are successful.
Definition reels.h:886
Clips()
Default construct a Clips object as an abstract method. This is required for declaring a Clips object...
Definition reels.h:823
void collapse_to_states()
Collapse the ClipMap to states.
Definition reels.h:949
Clips(Clients clients, Events events)
Construct a Clips object from a Clients and an Events objects.
Definition reels.h:831
bool load(pBinaryImage &p_bi)
Load the state of an object from a base64 mercury-dynamics serialization using image_get()
Definition reels.cpp:922
Clips(Clips &o_clips)
Copy-construct a Clips object.
Definition reels.h:850
bool scan_event(pChar p_e, pChar p_d, double w, pChar p_c, pChar p_t)
Process a row from a transaction file, to add the event to the client's timeline (clip).
Definition reels.cpp:877
Clips(const ClipMap &clip_map)
Construct a Clips object from a ClipMap to be copied.
Definition reels.h:838
uint64_t num_events()
Return the number of events stored in the internal ClipMap.
Definition reels.h:935
bool save(pBinaryImage &p_bi)
Save the state of an object into a base64 mercury-dynamics serialization using image_put()
Definition reels.cpp:979
A container class to hold events.
Definition reels.h:366
String optimize_events(Clips &clips, TargetMap &targets, int num_steps=10, int codes_per_step=5, double threshold=0.0001, pCodeSet p_force_include=nullptr, pCodeSet p_force_exclude=nullptr, Transform x_form=tr_linear, Aggregate agg=ag_longest, double p=0.5, int depth=1000, bool as_states=true, double exp_decay=0.00693, double lower_bound_p=0.95, bool log_lift=true)
Events optimizer.
Definition reels.cpp:275
uint64_t event_code(BinEventPt &ept)
Return the code associated to an BinEventPt if found in the object.
Definition reels.h:645
EventMap::iterator events_next_after_find(BinEventPt &ept)
Return the EventMap::iterator to the next BinEventPt after matching ev or nullptr if not found or is ...
Definition reels.h:688
void insert_row(pChar p_e, pChar p_d, double w)
Process a row from a transaction file.
Definition reels.cpp:202
bool load(pBinaryImage &p_bi)
Load the state of an object from a base64 mercury-dynamics serialization using image_get()
Definition reels.cpp:605
EventMap::iterator events_begin()
Return the EventMap::iterator to the first elements in the private variable .events.
Definition reels.h:668
EventMap::iterator events_end()
Return the EventMap::iterator to past-the-end in the private variable .events.
Definition reels.h:677
int num_events()
Return the number of events stored in the object.
Definition reels.h:659
void erase_str(ElementHash hash)
Remove a string from the StringUsageMap by decreasing its use count and destroying it if not used any...
Definition reels.h:605
bool define_event(pChar p_e, pChar p_d, double w, uint64_t code)
Define events explicitly.
Definition reels.cpp:250
ElementHash add_str(pChar p_str)
Define a new string and push it into the StringUsageMap.
Definition reels.h:576
bool save(pBinaryImage &p_bi)
Save the state of an object into a base64 mercury-dynamics serialization using image_put()
Definition reels.cpp:722
double linear_correlation(OptimizeEval &ev)
Compute Pearson linear correlation between predicted and observed in an OptimizeEval.
Definition reels.h:496
bool store_strings
If true, the object stores the string values.
Definition reels.h:372
bool score_model(double &score, double &targ_prop, CodeInTreeStatMap &codes_stat, bool calc_tree_stats, Clips &clips, TargetMap &targets, EventCodeMap code_dict, Transform x_form, Aggregate agg, double p, int depth, bool as_states)
Internal: Do one step of the optimize_events() method.
Definition reels.cpp:439
CodeScores get_top_codes(CodeInTreeStatMap &codes_stat, double targ_prop, double exp_decay, double lower_bound_p, bool log_lift)
Internal: Extract the top top_n codes by lift from a CodeInTreeStatMap map.
Definition reels.cpp:535
String get_str(ElementHash hash)
Get a string content from its hash value.
Definition reels.h:624
void set_store_strings(bool store)
Sets the public property store_strings to simplify the python interface.
Definition reels.h:565
void set_max_num_events(int max_events)
Sets the public property max_num_events to simplify the python interface.
Definition reels.h:556
Events()
Definition reels.h:370
int max_num_events
The maximum number of recurrent event stored via insert_row()
Definition reels.h:373
A minimalist logger stored as a std::string providing sprintf functionality.
Definition reels.h:317
String log
The std::string storing the content of the Logger is public.
Definition reels.h:350
void log_printf(const char *fmt, va_list args)
Add a nicely formatted string smaller than 256 chars to the logger.
Definition reels.h:340
void log_printf(const char *fmt,...)
Logging method wrapper supporting variable arguments.
Definition reels.h:328
A container class to hold target events and do predictions based on clips.
Definition reels.h:976
bool fit(Transform x_form, Aggregate agg, double p, int depth, bool as_states)
Fit the prediction model.
Definition reels.cpp:1054
pTargetMap p_target()
The address of the internal TargetMap.
Definition reels.h:1344
int num_targets()
Return the size of the internal TargetMap.
Definition reels.h:1308
double agresti_coull_upper_bound(uint64_t n_hits, uint64_t n_total)
Upper bound of the Agresti-Coull confidence interval for a binomial proportion.
Definition reels.h:1179
pClipMap clip_map()
The address of the internal ClipMap.
Definition reels.h:1326
void verbose_predict_clip(const ElementHash &client, Clip &clip, TimePoint &obs_time, bool &target_yn, int &longest_seq, uint64_t &n_visits, uint64_t &n_targets, double &targ_mean_t)
Predict time for a single Clip returning all kind of prediction related information.
Definition reels.cpp:1199
int tree_size()
Return the size of the internal CodeTree.
Definition reels.h:1317
double normal_pdf(double x)
Density (pdf) for the normal distribution with mean 0 and standard deviation 1.
Definition reels.h:1144
Targets(pClipMap p_clips, TargetMap target)
Construct a Targets object from a Clips object and a TargetMap.
Definition reels.h:985
double predict_time(CodeTreeNode &node)
Predict the time to target for a sub-clip that starts at a node.
Definition reels.h:1234
pCodeTree p_tree()
The address of the internal CodeTree.
Definition reels.h:1335
bool load(pBinaryImage &p_bi)
Load the state of an object from a base64 mercury-dynamics serialization using image_get()
Definition reels.cpp:1275
double predict_clip(Clip clip)
Predict the time to target for a clip.
Definition reels.h:1252
bool insert_target(pChar p_c, pChar p_t)
Utility to fill the internal TargetMap target.
Definition reels.cpp:1031
double agresti_coull_lower_bound(uint64_t n_hits, uint64_t n_total)
Lower bound of the Agresti-Coull confidence interval for a binomial proportion.
Definition reels.h:1200
bool save(pBinaryImage &p_bi)
Save the state of an object into a base64 mercury-dynamics serialization using image_put()
Definition reels.cpp:1411
int update_node(int idx_parent, uint64_t code, bool target, ExtFloat time_d)
Update (fit) the CodeTree inserting new nodes as necessary.
Definition reels.h:1102
TimesToTarget predict()
Predict time to target for all the clients in the Clips object used to fit the model.
Definition reels.cpp:1147
bool recurse_tree_stats(int depth, int idx, int parent_idx, uint64_t code, CodeInTreeStatMap &codes_stat)
Recursive tree exploration updating a CodeInTreeStatMap map.
Definition reels.cpp:1247
double normal_cdf(double x)
Cumulative distribution (cdf) for the normal distribution with mean 0 and standard deviation 1.
Definition reels.h:1159
A common ancestor of Clips and Targets to avoid duplicating time management.
Definition reels.h:775
TimeUtil()
Definition reels.h:779
char time_format[128]
Date and time format for insert_row() and define_event()
Definition reels.h:781
void set_time_format(pChar fmt)
Sets the public property time_format to simplify the python interface.
Definition reels.h:806
TimePoint get_time(pChar p_t)
Convert time as a string to a TimePoint (using the object's time_format).
Definition reels.h:790
The namespace including everything to simplify using Reels in a c++ application,.
Definition reels.cpp:35
std::vector< CodeScoreItem > CodeScores
CodeScores: A vector of CodeScoreItem.
Definition reels.h:272
std::vector< OptimizeEvalItem > OptimizeEval
OptimizeEval: A vector of OptimizeEvalItem.
Definition reels.h:259
double ExtFloat
Accumulator type: Was a 128 bit float, changed to 64 for macos compatibility.
Definition reels.h:71
TargetMap * pTargetMap
Pointer to a TargetMap.
Definition reels.h:229
std::set< uint64_t > CodeSet
CodeSet: A set of event codes.
Definition reels.h:241
CodeTree * pCodeTree
Pointer to a CodeTree.
Definition reels.h:296
std::vector< ElementHash > ClientIDs
ClientIDs: A vector of client ID hashes.
Definition reels.h:201
std::map< TimePoint, uint64_t > Clip
Clip: The clip (timeline) of a client is just a map of time points and codes.
Definition reels.h:213
std::map< uint64_t, BinEventPt > PriorityMap
PriorityMap: A map with all the acceptable priority values in the EventMap as keys.
Definition reels.h:153
time_t TimePoint
A c 8 byte integer time point.
Definition reels.h:69
const char * pChar
A c string.
Definition reels.h:67
CodeTreeNode * pCodeTreeNode
Pointer to a CodeTreeNode.
Definition reels.h:288
std::map< ElementHash, Clip > ClipMap
ClipMap: A map from clients to clips.
Definition reels.h:220
uint64_t ElementHash
A binary hash of a string.
Definition reels.h:65
bool compare_optimize_eval(const OptimizeEvalItem a, const OptimizeEvalItem b)
Compare two OptimizeEvalItem structures for sorting.
Definition reels.cpp:431
std::set< ElementHash > ClientIDSet
ClientIDSet: A set of client ID hashes.
Definition reels.h:208
std::map< BinEventPt, EventStat > EventMap
EventMap: A map from hashes in an BinEventPt to usage data defines the info about an event.
Definition reels.h:146
std::string String
A dynamically allocated c++ string.
Definition reels.h:66
ClipMap * pClipMap
Pointer to a ClipMap.
Definition reels.h:221
std::map< uint64_t, int > ChildIndex
ChildIndex: A map to find the next child in a CodeTree.
Definition reels.h:277
bool image_put(pBinaryImage p_bi, void *p_data, int size)
A function to push arbitrary raw data into a BinaryImage.
Definition reels.cpp:106
struct tm TimeStruct
A c structure of integer fields.
Definition reels.h:70
BinaryImage * pBinaryImage
A pointer to BinaryImage.
Definition reels.h:87
std::vector< ImageBlock > BinaryImage
An array of generic blocks to serialize anything.
Definition reels.h:86
std::map< uint64_t, uint64_t > EventCodeMap
EventCodeMap: A map converting the space of Event codes into a lower cardinality set for Event optimi...
Definition reels.h:159
std::map< ElementHash, TimePoint > TargetMap
TargetMap: A map from clients to target event TimePoints.
Definition reels.h:228
Transform
Transform: The transformation applied to time differences. (And inverted again in predict()....
Definition reels.h:301
@ tr_linear
Definition reels.h:301
@ tr_undefined
Definition reels.h:301
@ tr_log
Definition reels.h:301
std::map< uint64_t, CodeInTreeStatistics > CodeInTreeStatMap
CodeInTreeStatMap: A map to store all the CodeInTreeStatistics by code.
Definition reels.h:178
std::vector< CodeTreeNode > CodeTree
CodeTree: A tree of fitted targets.
Definition reels.h:295
CodeSet * pCodeSet
Pointer to a CodeSet.
Definition reels.h:242
uint64_t MurmurHash64A(const void *key, int len)
MurmurHash2, 64-bit versions, by Austin Appleby.
Definition reels.cpp:56
std::map< ElementHash, StringUsage > StringUsageMap
StringUsageMap: A map from hashes to string and number of times the string is used.
Definition reels.h:194
bool image_get(pBinaryImage p_bi, int &c_block, int &c_ofs, void *p_data, int size)
A function to get an arbitrary raw data block from a BinaryImage.
Definition reels.cpp:168
Aggregate
Aggregate: The method used to aggregate predictions for different sequence lengths.
Definition reels.h:306
@ ag_undefined
Definition reels.h:306
@ ag_longest
Definition reels.h:306
@ ag_mean
Definition reels.h:306
@ ag_minimax
Definition reels.h:306
std::vector< double > TimesToTarget
TimesToTarget: A vector of predictions.
Definition reels.h:236
ClientsServer clients
Definition reels.cpp:1534
ClipsServer clips
Definition reels.cpp:1535
TargetsServer targets
Definition reels.cpp:1536
EventsServer events
Definition reels.cpp:1533
#define PREDICT_MAX_TIME
Hundred years when the target was never seen.
Definition reels.h:62
#define MAX_SEQ_LEN_IN_PREDICT
The maximum sequence length used in prediction.
Definition reels.h:61
#define IMAGE_BUFF_SIZE
Makes sizeof(ImageBlock) == 6K.
Definition reels.h:58
#define DEFAULT_NUM_EVENTS
A size to store events in an Events object by default.
Definition reels.h:60
#define WEIGHT_PRECISION
10^ the number of digits at which weight is rounded
Definition reels.h:63
The binary representation of an event as stored in a transaction file.
Definition reels.h:94
double w
The "weight". A double representing a weight of the event.
Definition reels.h:97
bool operator<(const BinEventPt &o) const
Compare to another BinEventPt for strict order to support use as a key in a map.
Definition reels.h:115
ElementHash d
The "description". A binary hash of a string representing "the event".
Definition reels.h:96
bool operator==(const BinEventPt &o) const
Compare to another BinEventPt for identity to support use as a key in a map.
Definition reels.h:105
ElementHash e
The "emitter". A binary hash of a string representing "owner of event".
Definition reels.h:95
The binary representation of a transaction in a transaction file.
Definition reels.h:125
ElementHash c
The "client". A binary hash of a string representing "the actor".
Definition reels.h:126
TimePoint t
The "time". A timestamp of the event.
Definition reels.h:127
CodeInTreeStatistics: A structure to compute aggregated statistics of for each code.
Definition reels.h:165
uint64_t n_succ_target
Definition reels.h:167
int n_dep
Number of elements sum_dep has.
Definition reels.h:171
uint64_t n_incl_target
Definition reels.h:169
uint64_t n_succ_seen
Definition reels.h:166
uint64_t sum_dep
Sum of tree depth to estimate mean depth.
Definition reels.h:170
uint64_t n_incl_seen
Definition reels.h:168
CodeScoreItem: A structure to sort codes by lift.
Definition reels.h:264
double score
The score.
Definition reels.h:266
uint64_t code
The code.
Definition reels.h:265
CodeTreeNode: Each node in a fitted CodeTree.
Definition reels.h:282
uint64_t n_target
The number of clips that visited the node with the target.
Definition reels.h:284
uint64_t n_seen
The number of clips that visited the node (target and no target).
Definition reels.h:283
ExtFloat sum_time_d
Sum of time differences for the elements with a defined target.
Definition reels.h:285
ChildIndex child
A map of children by code (key) to index in the CodeTree.
Definition reels.h:286
The metrics associated to an event identified by a BinEventPt.
Definition reels.h:135
uint64_t seen
Number of times the event has been seen in the data.
Definition reels.h:136
uint64_t priority
The (unique) current priority assigned in the priority queue (set) to this event.
Definition reels.h:138
uint64_t code
A code number identifying the event.
Definition reels.h:137
An generic block structure to store object state in a Python-friendly way.
Definition reels.h:78
int block_num
The current block number in the BinaryImage.
Definition reels.h:80
uint8_t buffer[IMAGE_BUFF_SIZE]
The buffer.
Definition reels.h:82
int size
The number of already allocated bytes inside the current block.
Definition reels.h:79
OptimizeEvalItem: A structure to compare predicted and observed.
Definition reels.h:247
TimePoint t_obs
The observed result: Zero for not a target or elapsed time since the previous event in clip to target...
Definition reels.h:249
int seq_len
The length of the predicting clip.
Definition reels.h:250
double t_hat
The prediction (elapsed time since the last event in clip to predicted target).
Definition reels.h:248
StringUsage: A pair of String and number of times it is used.
Definition reels.h:184
String str
The string as plain text.
Definition reels.h:186
uint64_t seen
Number of times string is used. Increase by add_str() calls to the same string, decreased/destroyed b...
Definition reels.h:185