-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcandidate_source.h
127 lines (95 loc) · 3.74 KB
/
candidate_source.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
/* candidate_source.h -*- C++ -*-
Jeremy Barnes, 26 August 2009
Copyright (c) 2009 Jeremy Barnes. All rights reserved.
Candidate source class.
*/
#ifndef __github__candidate_source_h__
#define __github__candidate_source_h__
#include "data.h"
#include "utils/configuration.h"
#include "boosting/dense_features.h"
#include "boosting/classifier.h"
#include <map>
struct Ranked_Entry {
Ranked_Entry()
: index(-1), repo_id(-1), score(0.0), min_rank(-1), max_rank(-1),
keep(false)
{
}
int index;
int repo_id;
float score;
int min_rank;
int max_rank;
ML::distribution<float> features;
bool keep;
};
struct Ranked : std::vector<Ranked_Entry> {
Ranked() {}
Ranked(const IdSet & idset);
void sort();
};
struct Candidate_Data {
virtual ~Candidate_Data()
{
}
// Information about each candidate source from each repo
// Access with: info[repo_id][source_id]
std::map<int, std::map<int, Ranked_Entry> > info;
};
/*****************************************************************************/
/* CANDIDATE_SOURCE */
/*****************************************************************************/
// A source of candidates; one for each of the different types. The goal is to
// generate a set of possible candidates, and to then add only a limited number
// to the final set.
struct Candidate_Source {
Candidate_Source(const std::string & type, int id);
virtual ~Candidate_Source();
std::string type() const { return type_; }
std::string name() const { return name_; }
int id() const { return id_; }
virtual void configure(const ML::Configuration & config,
const std::string & name);
virtual void init();
/// Generate feature space specific to this candidate
virtual boost::shared_ptr<const ML::Dense_Feature_Space>
feature_space() const;
/// Feature space that's common to all features
static ML::Dense_Feature_Space
common_feature_space();
static void
common_features(distribution<float> & result,
int user_id, int repo_id, const Data & data,
Candidate_Data & candidate_data);
/// Feature space containing features specific to this candidate source
virtual ML::Dense_Feature_Space specific_feature_space() const;
virtual void
gen_candidates(Ranked & result, int user_id, const Data & data,
Candidate_Data & candidate_data) const;
/// Generate the very basic set of candidates with features but no
/// ranking information
virtual void
candidate_set(Ranked & results, int user_id, const Data & data,
Candidate_Data & candidate_data) const = 0;
std::string name_;
std::string type_;
int id_;
int max_entries; ///< Max number of entries to generate for this one
float min_prob; ///< Minimum probability to generate for
// Classifier, etc to perform the ranking
std::string classifier_file;
ML::Classifier classifier;
boost::shared_ptr<const ML::Dense_Feature_Space> our_fs;
boost::shared_ptr<const ML::Dense_Feature_Space> classifier_fs;
ML::Dense_Feature_Space::Mapping mapping;
ML::Optimization_Info opt_info;
bool load_data;
};
/*****************************************************************************/
/* FACTORY */
/*****************************************************************************/
boost::shared_ptr<Candidate_Source>
get_candidate_source(const ML::Configuration & config,
const std::string & name);
#endif /* __github__candidate_source_h__ */