This repository has been archived by the owner on Jul 16, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 151
/
Copy pathnaive_bayes_dogs.rs
154 lines (132 loc) · 5.31 KB
/
naive_bayes_dogs.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
extern crate rusty_machine;
extern crate rand;
use rand::Rand;
use rand::distributions::Sample;
use rand::distributions::normal::Normal;
use rusty_machine::learning::naive_bayes::{self, NaiveBayes};
use rusty_machine::linalg::{Matrix, BaseMatrix};
use rusty_machine::learning::SupModel;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum Color {
Red,
White,
}
#[derive(Clone, Debug)]
struct Dog {
color: Color,
friendliness: f64,
furriness: f64,
speed: f64,
}
impl Rand for Dog {
/// Generate a random dog.
fn rand<R: rand::Rng>(rng: &mut R) -> Self {
// Friendliness, furriness, and speed are normally distributed and
// (given color:) independent.
let mut red_dog_friendliness = Normal::new(0., 1.);
let mut red_dog_furriness = Normal::new(0., 1.);
let mut red_dog_speed = Normal::new(0., 1.);
let mut white_dog_friendliness = Normal::new(1., 1.);
let mut white_dog_furriness = Normal::new(1., 1.);
let mut white_dog_speed = Normal::new(-1., 1.);
// Flip a coin to decide whether to generate a red or white dog.
let coin: f64 = rng.gen();
let color = if coin < 0.5 { Color::Red } else { Color::White };
match color {
Color::Red => {
Dog {
color: Color::Red,
// sample from our normal distributions for each trait
friendliness: red_dog_friendliness.sample(rng),
furriness: red_dog_furriness.sample(rng),
speed: red_dog_speed.sample(rng),
}
},
Color::White => {
Dog {
color: Color::White,
friendliness: white_dog_friendliness.sample(rng),
furriness: white_dog_furriness.sample(rng),
speed: white_dog_speed.sample(rng),
}
},
}
}
}
fn generate_dog_data(training_set_size: u32, test_set_size: u32)
-> (Matrix<f64>, Matrix<f64>, Matrix<f64>, Vec<Dog>) {
let mut randomness = rand::StdRng::new()
.expect("we should be able to get an RNG");
let rng = &mut randomness;
// We'll train the model on these dogs
let training_dogs = (0..training_set_size)
.map(|_| { Dog::rand(rng) })
.collect::<Vec<_>>();
// ... and then use the model to make predictions about these dogs' color
// given only their trait measurements.
let test_dogs = (0..test_set_size)
.map(|_| { Dog::rand(rng) })
.collect::<Vec<_>>();
// The model's `.train` method will take two matrices, each with a row for
// each dog in the training set: the rows in the first matrix contain the
// trait measurements; the rows in the second are either [1, 0] or [0, 1]
// to indicate color.
let training_data: Vec<f64> = training_dogs.iter()
.flat_map(|dog| vec![dog.friendliness, dog.furriness, dog.speed])
.collect();
let training_matrix: Matrix<f64> = training_data.chunks(3).collect();
let target_data: Vec<f64> = training_dogs.iter()
.flat_map(|dog| match dog.color {
Color::Red => vec![1., 0.],
Color::White => vec![0., 1.],
})
.collect();
let target_matrix: Matrix<f64> = target_data.chunks(2).collect();
// Build another matrix for the test set of dogs to make predictions about.
let test_data: Vec<f64> = test_dogs.iter()
.flat_map(|dog| vec![dog.friendliness, dog.furriness, dog.speed])
.collect();
let test_matrix: Matrix<f64> = test_data.chunks(3).collect();
(training_matrix, target_matrix, test_matrix, test_dogs)
}
fn evaluate_prediction(hits: &mut u32, dog: &Dog, prediction: &[f64]) -> (Color, bool) {
let predicted_color = dog.color;
let actual_color = if prediction[0] == 1. {
Color::Red
} else {
Color::White
};
let accurate = predicted_color == actual_color;
if accurate {
*hits += 1;
}
(actual_color, accurate)
}
fn main() {
let (training_set_size, test_set_size) = (1000, 1000);
// Generate all of our train and test data
let (training_matrix, target_matrix, test_matrix, test_dogs) = generate_dog_data(training_set_size, test_set_size);
// Train!
let mut model = NaiveBayes::<naive_bayes::Gaussian>::new();
model.train(&training_matrix, &target_matrix)
.expect("failed to train model of dogs");
// Predict!
let predictions = model.predict(&test_matrix)
.expect("failed to predict dogs!?");
// Score how well we did.
let mut hits = 0;
let unprinted_total = test_set_size.saturating_sub(10) as usize;
for (dog, prediction) in test_dogs.iter().zip(predictions.row_iter()).take(unprinted_total) {
evaluate_prediction(&mut hits, dog, prediction.raw_slice());
}
if unprinted_total > 0 {
println!("...");
}
for (dog, prediction) in test_dogs.iter().zip(predictions.row_iter()).skip(unprinted_total) {
let (actual_color, accurate) = evaluate_prediction(&mut hits, dog, prediction.raw_slice());
println!("Predicted: {:?}; Actual: {:?}; Accurate? {:?}",
dog.color, actual_color, accurate);
}
println!("Accuracy: {}/{} = {:.1}%", hits, test_set_size,
(f64::from(hits))/(f64::from(test_set_size)) * 100.);
}