39#ifndef INCLUDED_CATCH2
40#define INCLUDED_CATCH2
48#ifndef INCLUDED_REELS_TYPES
49#define INCLUDED_REELS_TYPES
58#define IMAGE_BUFF_SIZE 6136
59#define PRIORITY_SEEN_FACTOR 2000000000
60#define DEFAULT_NUM_EVENTS 1000
61#define MAX_SEQ_LEN_IN_PREDICT 1000
62#define PREDICT_MAX_TIME (100*365.25*24*3600)
63#define WEIGHT_PRECISION 10000
213typedef std::map<TimePoint, uint64_t>
Clip;
340 vsnprintf(buffer,
sizeof(buffer), fmt, args);
446 double exp_decay = 0.00693,
double lower_bound_p = 0.95,
bool log_lift =
true);
494 ExtFloat s_h = 0, s_o = 0, sho = 0, ssh = 0, sso = 0;
497 for (OptimizeEval::iterator it = ev.begin(); it != ev.end(); ++it) {
498 if (it->t_obs != 0) {
501 sho += it->t_hat*it->t_obs;
502 ssh += it->t_hat*it->t_hat;
503 sso += it->t_obs*it->t_obs;
511 double d2 = (n*ssh - s_h*s_h)*(n*sso - s_o*s_o);
516 return (n*sho - s_h*s_o)/sqrt(d2);
574 int ll = strlen(p_str);
584 StringUsageMap::iterator it = names_map.find(hash);
586 if (it != names_map.end())
591 names_map[hash] = su;
605 StringUsageMap::iterator it = names_map.find(hash);
607 if (it != names_map.end()) {
608 if (--it->second.seen == 0)
627 StringUsageMap::iterator it = names_map.find(hash);
629 if (it != names_map.end())
630 return it->second.str;
643 EventMap::iterator it =
event.find(ept);
645 if (it == event.end())
648 return it->second.code;
666 return event.begin();
686 EventMap::iterator it =
event.find(ept);
688 if (it != event.end())
698 uint64_t priority_low = 0;
699 uint64_t next_code = 0;
724 int ll = strlen(p_cli);
887 ClipMap::iterator it = clips.find(client_hash);
889 if (it == clips.end()) {
892 clip[time_pt] = code;
894 clips[client_hash] = clip;
897 it->second[time_pt] = code;
936 for (ClipMap::iterator it = clips.begin(); it != clips.end(); ++it)
937 ret += it->second.size();
947 for (ClipMap::iterator it_client = clips.begin(); it_client != clips.end(); ++it_client) {
948 uint64_t last_code = 0xA30BdefacedCabal;
949 for (Clip::const_iterator it = it_client->second.cbegin(); it != it_client->second.cend();) {
950 uint64_t code = it->second;
951 if (code == last_code)
952 it_client->second.erase(it++);
984 tree.push_back(root);
1068 uint64_t &n_targets,
1069 double &targ_mean_t);
1101 if (idx_parent == 0) {
1105 tree[0].sum_time_d += time_d;
1109 ChildIndex::iterator it = tree[idx_parent].child.find(code);
1111 if (it != tree[idx_parent].child.end()) {
1112 int idx = it->second;
1116 tree[idx].n_target++;
1117 tree[idx].sum_time_d += time_d;
1125 tree.push_back(node);
1127 int idx = tree.size() - 1;
1129 tree[idx_parent].child[code] = idx;
1144 static const double inv_sqrt_2pi = 0.3989422804014327;
1146 return exp(-0.5*x*x)*inv_sqrt_2pi;
1160 static const double m_sqrt_dot_5 = 0.7071067811865476;
1162 return 0.5*erfc(-x*m_sqrt_dot_5);
1179 double n_tilde = n_total + binomial_z_sqr;
1180 double p_tilde = (n_hits + binomial_z_sqr_div_2)/n_tilde;
1181 double a = binomial_z*sqrt(p_tilde*(1 - p_tilde)/n_tilde);
1200 double n_tilde = n_total + binomial_z_sqr;
1201 double p_tilde = (n_hits + binomial_z_sqr_div_2)/n_tilde;
1202 double a = binomial_z*sqrt(p_tilde*(1 - p_tilde)/n_tilde);
1255 for (Clip::reverse_iterator it = clip.rbegin(); it != clip.rend(); it++) {
1256 ChildIndex::iterator jt = tree[idx].child.find(it->second);
1258 if (jt == tree[idx].child.end())
1275 for (
int i = 1; i < n; i++)
1281 for (
int i = 1; i < n; i++)
1282 ret = std::min(ret, t[i]);
1306 return target.size();
1354 double binomial_z = 0;
1355 double binomial_z_sqr = 0;
1356 double binomial_z_sqr_div_2 = 0;
A container class to hold client ids.
Definition: reels.h:710
void add_client_id(pChar p_cli)
Add a client ID to this container.
Definition: reels.cpp:813
ElementHash hash_client_id(pChar p_cli)
Return the hash of a client ID as an ElementHash.
Definition: reels.h:723
Clients()
Definition: reels.h:714
bool save(pBinaryImage &p_bi)
Save the state of an object into a base64 mercury-dynamics serialization using image_put()
Definition: reels.cpp:863
ClientIDSet id_set
The set of the same hashes for fast search.
Definition: reels.h:766
bool load(pBinaryImage &p_bi)
Load the state of an object from a base64 mercury-dynamics serialization using image_get()
Definition: reels.cpp:822
A container class to hold clips (sequences of events).
Definition: reels.h:813
pClipMap clip_map()
The address of the internal ClipMap to be accessed from a Targets object.
Definition: reels.h:923
void insert_event(ElementHash client_hash, uint64_t code, TimePoint time_pt)
The kernel of a scan_event() made inline, when all checks and conversion to binary are successful.
Definition: reels.h:883
Clips()
Default construct a Clips object as an abstract method. This is required for declaring a Clips object...
Definition: reels.h:820
void collapse_to_states()
Collapse the ClipMap to states.
Definition: reels.h:946
Clips(Clients clients, Events events)
Construct a Clips object from a Clients and an Events objects.
Definition: reels.h:828
bool load(pBinaryImage &p_bi)
Load the state of an object from a base64 mercury-dynamics serialization using image_get()
Definition: reels.cpp:936
Clips(Clips &o_clips)
Copy-construct a Clips object.
Definition: reels.h:847
bool scan_event(pChar p_e, pChar p_d, double w, pChar p_c, pChar p_t)
Process a row from a transaction file, to add the event to the client's timeline (clip).
Definition: reels.cpp:891
Clips(const ClipMap &clip_map)
Construct a Clips object from a ClipMap to be copied.
Definition: reels.h:835
uint64_t num_events()
Return the number of events stored in the internal ClipMap.
Definition: reels.h:932
bool save(pBinaryImage &p_bi)
Save the state of an object into a base64 mercury-dynamics serialization using image_put()
Definition: reels.cpp:1005
A container class to hold events.
Definition: reels.h:363
String optimize_events(Clips &clips, TargetMap &targets, int num_steps=10, int codes_per_step=5, double threshold=0.0001, pCodeSet p_force_include=nullptr, pCodeSet p_force_exclude=nullptr, Transform x_form=tr_linear, Aggregate agg=ag_longest, double p=0.5, int depth=1000, bool as_states=true, double exp_decay=0.00693, double lower_bound_p=0.95, bool log_lift=true)
Events optimizer.
Definition: reels.cpp:275
uint64_t event_code(BinEventPt &ept)
Return the code associated to an BinEventPt if found in the object.
Definition: reels.h:642
EventMap::iterator events_next_after_find(BinEventPt &ept)
Return the EventMap::iterator to the next BinEventPt after matching ev or nullptr if not found or is ...
Definition: reels.h:685
void insert_row(pChar p_e, pChar p_d, double w)
Process a row from a transaction file.
Definition: reels.cpp:202
bool load(pBinaryImage &p_bi)
Load the state of an object from a base64 mercury-dynamics serialization using image_get()
Definition: reels.cpp:605
EventMap::iterator events_begin()
Return the EventMap::iterator to the first elements in the private variable .events.
Definition: reels.h:665
EventMap::iterator events_end()
Return the EventMap::iterator to past-the-end in the private variable .events.
Definition: reels.h:674
int num_events()
Return the number of events stored in the object.
Definition: reels.h:656
void erase_str(ElementHash hash)
Remove a string from the StringUsageMap by decreasing its use count and destroying it if not used any...
Definition: reels.h:602
bool define_event(pChar p_e, pChar p_d, double w, uint64_t code)
Define events explicitly.
Definition: reels.cpp:250
ElementHash add_str(pChar p_str)
Define a new string and push it into the StringUsageMap.
Definition: reels.h:573
bool save(pBinaryImage &p_bi)
Save the state of an object into a base64 mercury-dynamics serialization using image_put()
Definition: reels.cpp:735
double linear_correlation(OptimizeEval &ev)
Compute Pearson linear correlation between predicted and observed in an OptimizeEval.
Definition: reels.h:493
bool store_strings
If true, the object stores the string values.
Definition: reels.h:369
bool score_model(double &score, double &targ_prop, CodeInTreeStatMap &codes_stat, bool calc_tree_stats, Clips &clips, TargetMap &targets, EventCodeMap code_dict, Transform x_form, Aggregate agg, double p, int depth, bool as_states)
Internal: Do one step of the optimize_events() method.
Definition: reels.cpp:439
CodeScores get_top_codes(CodeInTreeStatMap &codes_stat, double targ_prop, double exp_decay, double lower_bound_p, bool log_lift)
Internal: Extract the top top_n codes by lift from a CodeInTreeStatMap map.
Definition: reels.cpp:535
String get_str(ElementHash hash)
Get a string content from its hash value.
Definition: reels.h:621
void set_store_strings(bool store)
Sets the public property store_strings to simplify the python interface.
Definition: reels.h:562
void set_max_num_events(int max_events)
Sets the public property max_num_events to simplify the python interface.
Definition: reels.h:553
Events()
Definition: reels.h:367
int max_num_events
The maximum number of recurrent event stored via insert_row()
Definition: reels.h:370
A minimalist logger stored as a std::string providing sprintf functionality.
Definition: reels.h:314
String log
The std::string storing the content of the Logger is public.
Definition: reels.h:347
void log_printf(const char *fmt, va_list args)
Add a nicely formatted string smaller than 256 chars to the logger.
Definition: reels.h:337
void log_printf(const char *fmt,...)
Logging method wrapper supporting variable arguments.
Definition: reels.h:325
A container class to hold target events and do predictions based on clips.
Definition: reels.h:973
bool fit(Transform x_form, Aggregate agg, double p, int depth, bool as_states)
Fit the prediction model.
Definition: reels.cpp:1080
pTargetMap p_target()
The address of the internal TargetMap.
Definition: reels.h:1341
int num_targets()
Return the size of the internal TargetMap.
Definition: reels.h:1305
double agresti_coull_upper_bound(uint64_t n_hits, uint64_t n_total)
Upper bound of the Agresti-Coull confidence interval for a binomial proportion.
Definition: reels.h:1176
pClipMap clip_map()
The address of the internal ClipMap.
Definition: reels.h:1323
void verbose_predict_clip(const ElementHash &client, Clip &clip, TimePoint &obs_time, bool &target_yn, int &longest_seq, uint64_t &n_visits, uint64_t &n_targets, double &targ_mean_t)
Predict time for a single Clip returning all kind of prediction related information.
Definition: reels.cpp:1225
int tree_size()
Return the size of the internal CodeTree.
Definition: reels.h:1314
double normal_pdf(double x)
Density (pdf) for the normal distribution with mean 0 and standard deviation 1.
Definition: reels.h:1141
Targets(pClipMap p_clips, TargetMap target)
Construct a Targets object from a Clips object and a TargetMap.
Definition: reels.h:982
double predict_time(CodeTreeNode &node)
Predict the time to target for a sub-clip that starts at a node.
Definition: reels.h:1231
pCodeTree p_tree()
The address of the internal CodeTree.
Definition: reels.h:1332
bool load(pBinaryImage &p_bi)
Load the state of an object from a base64 mercury-dynamics serialization using image_get()
Definition: reels.cpp:1301
double predict_clip(Clip clip)
Predict the time to target for a clip.
Definition: reels.h:1249
bool insert_target(pChar p_c, pChar p_t)
Utility to fill the internal TargetMap target.
Definition: reels.cpp:1057
double agresti_coull_lower_bound(uint64_t n_hits, uint64_t n_total)
Lower bound of the Agresti-Coull confidence interval for a binomial proportion.
Definition: reels.h:1197
bool save(pBinaryImage &p_bi)
Save the state of an object into a base64 mercury-dynamics serialization using image_put()
Definition: reels.cpp:1457
int update_node(int idx_parent, uint64_t code, bool target, ExtFloat time_d)
Update (fit) the CodeTree inserting new nodes as necessary.
Definition: reels.h:1099
TimesToTarget predict()
Predict time to target for all the clients in the Clips object used to fit the model.
Definition: reels.cpp:1173
bool recurse_tree_stats(int depth, int idx, int parent_idx, uint64_t code, CodeInTreeStatMap &codes_stat)
Recursive tree exploration updating a CodeInTreeStatMap map.
Definition: reels.cpp:1273
double normal_cdf(double x)
Cumulative distribution (cdf) for the normal distribution with mean 0 and standard deviation 1.
Definition: reels.h:1156
A common ancestor of Clips and Targets to avoid duplicating time management.
Definition: reels.h:772
TimeUtil()
Definition: reels.h:776
char time_format[128]
Date and time format for insert_row() and define_event()
Definition: reels.h:778
void set_time_format(pChar fmt)
Sets the public property time_format to simplify the python interface.
Definition: reels.h:803
TimePoint get_time(pChar p_t)
Convert time as a string to a TimePoint (using the object's time_format).
Definition: reels.h:787
The namespace including everything to simplify using Reels in a c++ application,.
Definition: reels.cpp:35
std::vector< CodeScoreItem > CodeScores
CodeScores: A vector of CodeScoreItem.
Definition: reels.h:269
std::vector< OptimizeEvalItem > OptimizeEval
OptimizeEval: A vector of OptimizeEvalItem.
Definition: reels.h:256
double ExtFloat
Accumulator type: Was a 128 bit float, changed to 64 for macos compatibility.
Definition: reels.h:71
TargetMap * pTargetMap
Pointer to a TargetMap.
Definition: reels.h:229
std::set< uint64_t > CodeSet
CodeSet: A set of event codes.
Definition: reels.h:241
CodeTree * pCodeTree
Pointer to a CodeTree.
Definition: reels.h:293
std::vector< ElementHash > ClientIDs
ClientIDs: A vector of client ID hashes.
Definition: reels.h:201
std::map< TimePoint, uint64_t > Clip
Clip: The clip (timeline) of a client is just a map of time points and codes.
Definition: reels.h:213
std::map< uint64_t, BinEventPt > PriorityMap
PriorityMap: A map with all the acceptable priority values in the EventMap as keys.
Definition: reels.h:153
time_t TimePoint
A c 8 byte integer time point.
Definition: reels.h:69
const char * pChar
A c string.
Definition: reels.h:67
CodeTreeNode * pCodeTreeNode
Pointer to a CodeTreeNode.
Definition: reels.h:285
std::map< ElementHash, Clip > ClipMap
ClipMap: A map from clients to clips.
Definition: reels.h:220
uint64_t ElementHash
A binary hash of a string.
Definition: reels.h:65
std::set< ElementHash > ClientIDSet
ClientIDSet: A set of client ID hashes.
Definition: reels.h:208
std::map< BinEventPt, EventStat > EventMap
EventMap: A map from hashes in an BinEventPt to usage data defines the info about an event.
Definition: reels.h:146
std::string String
A dynamically allocated c++ string.
Definition: reels.h:66
ClipMap * pClipMap
Pointer to a ClipMap.
Definition: reels.h:221
std::map< uint64_t, int > ChildIndex
ChildIndex: A map to find the next child in a CodeTree.
Definition: reels.h:274
bool image_put(pBinaryImage p_bi, void *p_data, int size)
A function to push arbitrary raw data into a BinaryImage.
Definition: reels.cpp:106
struct tm TimeStruct
A c structure of integer fields.
Definition: reels.h:70
BinaryImage * pBinaryImage
A pointer to BinaryImage.
Definition: reels.h:87
std::vector< ImageBlock > BinaryImage
An array of generic blocks to serialize anything.
Definition: reels.h:86
std::map< uint64_t, uint64_t > EventCodeMap
EventCodeMap: A map converting the space of Event codes into a lower cardinality set for Event optimi...
Definition: reels.h:159
std::map< ElementHash, TimePoint > TargetMap
TargetMap: A map from clients to target event TimePoints.
Definition: reels.h:228
Transform
Transform: The transformation applied to time differences. (And inverted again in predict()....
Definition: reels.h:298
@ tr_linear
Definition: reels.h:298
@ tr_undefined
Definition: reels.h:298
@ tr_log
Definition: reels.h:298
std::map< uint64_t, CodeInTreeStatistics > CodeInTreeStatMap
CodeInTreeStatMap: A map to store all the CodeInTreeStatistics by code.
Definition: reels.h:178
std::vector< CodeTreeNode > CodeTree
CodeTree: A tree of fitted targets.
Definition: reels.h:292
CodeSet * pCodeSet
Pointer to a CodeSet.
Definition: reels.h:242
uint64_t MurmurHash64A(const void *key, int len)
MurmurHash2, 64-bit versions, by Austin Appleby.
Definition: reels.cpp:56
std::map< ElementHash, StringUsage > StringUsageMap
StringUsageMap: A map from hashes to string and number of times the string is used.
Definition: reels.h:194
bool image_get(pBinaryImage p_bi, int &c_block, int &c_ofs, void *p_data, int size)
A function to get an arbitrary raw data block from a BinaryImage.
Definition: reels.cpp:168
Aggregate
Aggregate: The method used to aggregate predictions for different sequence lengths.
Definition: reels.h:303
@ ag_undefined
Definition: reels.h:303
@ ag_longest
Definition: reels.h:303
@ ag_mean
Definition: reels.h:303
@ ag_minimax
Definition: reels.h:303
std::vector< double > TimesToTarget
TimesToTarget: A vector of predictions.
Definition: reels.h:236
ClientsServer clients
Definition: reels.cpp:1580
ClipsServer clips
Definition: reels.cpp:1581
TargetsServer targets
Definition: reels.cpp:1582
EventsServer events
Definition: reels.cpp:1579
#define PREDICT_MAX_TIME
Hundred years when the target was never seen.
Definition: reels.h:62
#define MAX_SEQ_LEN_IN_PREDICT
The maximum sequence length used in prediction.
Definition: reels.h:61
#define IMAGE_BUFF_SIZE
Makes sizeof(ImageBlock) == 6K.
Definition: reels.h:58
#define DEFAULT_NUM_EVENTS
A size to store events in an Events object by default.
Definition: reels.h:60
#define WEIGHT_PRECISION
10^ the number of digits at which weight is rounded
Definition: reels.h:63
The binary representation of an event as stored in a transaction file.
Definition: reels.h:94
double w
The "weight". A double representing a weight of the event.
Definition: reels.h:97
bool operator<(const BinEventPt &o) const
Compare to another BinEventPt for strict order to support use as a key in a map.
Definition: reels.h:115
ElementHash d
The "description". A binary hash of a string representing "the event".
Definition: reels.h:96
bool operator==(const BinEventPt &o) const
Compare to another BinEventPt for identity to support use as a key in a map.
Definition: reels.h:105
ElementHash e
The "emitter". A binary hash of a string representing "owner of event".
Definition: reels.h:95
The binary representation of a transaction in a transaction file.
Definition: reels.h:125
ElementHash c
The "client". A binary hash of a string representing "the actor".
Definition: reels.h:126
TimePoint t
The "time". A timestamp of the event.
Definition: reels.h:127
CodeInTreeStatistics: A structure to compute aggregated statistics of for each code.
Definition: reels.h:165
uint64_t n_succ_target
Definition: reels.h:167
int n_dep
Number of elements sum_dep has.
Definition: reels.h:171
uint64_t n_incl_target
Definition: reels.h:169
uint64_t n_succ_seen
Definition: reels.h:166
uint64_t sum_dep
Sum of tree depth to estimate mean depth.
Definition: reels.h:170
uint64_t n_incl_seen
Definition: reels.h:168
CodeScoreItem: A structure to sort codes by lift.
Definition: reels.h:261
double score
The score.
Definition: reels.h:263
uint64_t code
The code.
Definition: reels.h:262
CodeTreeNode: Each node in a fitted CodeTree.
Definition: reels.h:279
uint64_t n_target
The number of clips that visited the node with the target.
Definition: reels.h:281
uint64_t n_seen
The number of clips that visited the node (target and no target).
Definition: reels.h:280
ExtFloat sum_time_d
Sum of time differences for the elements with a defined target.
Definition: reels.h:282
ChildIndex child
A map of children by code (key) to index in the CodeTree.
Definition: reels.h:283
The metrics associated to an event identified by a BinEventPt.
Definition: reels.h:135
uint64_t seen
Number of times the event has been seen in the data.
Definition: reels.h:136
uint64_t priority
The (unique) current priority assigned in the priority queue (set) to this event.
Definition: reels.h:138
uint64_t code
A code number identifying the event.
Definition: reels.h:137
An generic block structure to store object state in a Python-friendly way.
Definition: reels.h:78
int block_num
The current block number in the BinaryImage.
Definition: reels.h:80
uint8_t buffer[IMAGE_BUFF_SIZE]
The buffer.
Definition: reels.h:82
int size
The number of already allocated bytes inside the current block.
Definition: reels.h:79
OptimizeEvalItem: A structure to compare predicted and observed.
Definition: reels.h:247
TimePoint t_obs
The observed result: Zero for not a target or elapsed time since the previous event in clip to target...
Definition: reels.h:249
int seq_len
The length of the predicting clip.
Definition: reels.h:250
double t_hat
The prediction (elapsed time since the last event in clip to predicted target).
Definition: reels.h:248
StringUsage: A pair of String and number of times it is used.
Definition: reels.h:184
String str
The string as plain text.
Definition: reels.h:186
uint64_t seen
Number of times string is used. Increase by add_str() calls to the same string, decreased/destroyed b...
Definition: reels.h:185