Skip to content

Commit

Permalink
Add in blacklist to indexing (#26)
Browse files Browse the repository at this point in the history
  • Loading branch information
ianoc authored Nov 3, 2020
1 parent a2ffd6f commit 5af949d
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 9 deletions.
8 changes: 4 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

102 changes: 98 additions & 4 deletions bazelfe-core/src/index_table/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ pub struct IndexTable {
id_to_target_vec: Arc<RwLock<Vec<Arc<Vec<u8>>>>>,
id_to_target_reverse_map: Arc<RwLock<HashMap<Arc<Vec<u8>>, usize>>>,
mutated: Arc<AtomicBool>,
target_blacklist: Arc<RwLock<HashSet<usize>>>,
}
#[derive(Clone, Debug)]
pub struct DebugIndexTable {
Expand All @@ -56,6 +57,7 @@ impl<'a> IndexTable {
id_to_target_vec: Arc::new(RwLock::new(Vec::new())),
id_to_target_reverse_map: Arc::new(RwLock::new(HashMap::new())),
mutated: Arc::new(AtomicBool::new(false)),
target_blacklist: Arc::new(RwLock::new(HashSet::default())),
}
}

Expand Down Expand Up @@ -88,13 +90,23 @@ impl<'a> IndexTable {
DebugIndexTable { data_map: res_lst }
}

pub async fn add_target_to_blacklist(&self, target: String) {
let id = self.maybe_insert_target_string(target).await;
let mut lock = self.target_blacklist.write().await;
lock.insert(id);
}

pub async fn add_transformation_mapping(&self, src_str: String, dest_str: String) {
let src_id = self.maybe_insert_target_string(src_str).await;
let dest_id = self.maybe_insert_target_string(dest_str).await;
let mut lock = self.id_to_replacement_id.write().await;
lock.insert(src_id, dest_id);
let blacklist = self.target_blacklist.read().await;
if !blacklist.contains(&dest_id) {
let mut lock = self.id_to_replacement_id.write().await;
lock.insert(src_id, dest_id);
}
}

/// use the replacement map to maybe change the target key into a replacement one via a transform mapping.
pub async fn maybe_update_id(&self, src_id: usize) -> usize {
let lock = self.id_to_replacement_id.read().await;
match lock.get(&src_id) {
Expand Down Expand Up @@ -135,7 +147,7 @@ impl<'a> IndexTable {
let mut file = std::io::BufWriter::with_capacity(512 * 1024, file);

file.write_u64::<LittleEndian>(7654323579 as u64).unwrap();
file.write_u16::<LittleEndian>(0 as u16).unwrap();
file.write_u16::<LittleEndian>(1 as u16).unwrap();

let _ = {
let id_vec = self.id_to_target_vec.read().await;
Expand Down Expand Up @@ -192,6 +204,13 @@ impl<'a> IndexTable {
file.write_u64::<LittleEndian>(*v as u64).unwrap();
}

let target_blacklist = self.target_blacklist.read().await;
file.write_u64::<LittleEndian>(target_blacklist.len() as u64)
.unwrap();
for e in target_blacklist.iter() {
file.write_u64::<LittleEndian>(*e as u64).unwrap();
}

file.flush().unwrap();
}

Expand All @@ -208,7 +227,7 @@ impl<'a> IndexTable {
if signature != 7654323579 {
panic!("Invalid signature, {} not a bazel runner file?", signature);
}
let _ = rdr.read_u16::<LittleEndian>().unwrap();
let file_version_number = rdr.read_u16::<LittleEndian>().unwrap();

let num_vec_entries = rdr.read_u64::<LittleEndian>().unwrap();
let mut index_buf = Vec::default();
Expand Down Expand Up @@ -269,6 +288,19 @@ impl<'a> IndexTable {
}
debug!("Complete id_to_replacement_id");

let target_blacklist = if file_version_number >= 1 {
let target_blacklist_size = rdr.read_u64::<LittleEndian>().unwrap();
let mut target_blacklist = HashSet::default();
for _ in 0..target_blacklist_size {
let k = rdr.read_u64::<LittleEndian>().unwrap();
target_blacklist.insert(k as usize);
}
target_blacklist
} else {
HashSet::default()
};
debug!("Complete target blacklist");

debug!("Finished parsing..");

Self {
Expand All @@ -279,6 +311,7 @@ impl<'a> IndexTable {
id_to_target_vec: Arc::new(RwLock::new(index_buf)),
id_to_target_reverse_map: Arc::new(RwLock::new(reverse_hashmap)),
mutated: Arc::new(AtomicBool::new(false)),
target_blacklist: Arc::new(RwLock::new(target_blacklist)),
}
}

Expand Down Expand Up @@ -338,6 +371,7 @@ impl<'a> IndexTable {

let should_update = {
let read_guard = self.id_to_ctime.read().await;

match read_guard.get(key_id) {
None => true,
Some(prev) => *prev < newest_ctime,
Expand Down Expand Up @@ -441,6 +475,7 @@ impl<'a> IndexTable {
id_to_target_vec: Arc::new(RwLock::new(id_to_target_vec)),
id_to_target_reverse_map: Arc::new(RwLock::new(id_to_target_reverse_map)),
mutated: Arc::new(AtomicBool::new(false)),
target_blacklist: Arc::new(RwLock::new(HashSet::default())),
}
}
pub fn from_hashmap(m: HashMap<String, Vec<(u16, String)>>) -> Self {
Expand All @@ -459,6 +494,9 @@ impl<'a> IndexTable {
where
S: Into<Cow<'b, str>>,
{
if self.target_blacklist.read().await.contains(&target_id) {
return false;
}
let mut guard = self.tbl_map.write().await;
let k: Cow<'b, str> = key.into();

Expand Down Expand Up @@ -491,6 +529,9 @@ impl<'a> IndexTable {
where
S: Into<Cow<'b, str>>,
{
if self.target_blacklist.read().await.contains(&target_id) {
return false;
}
let mut guard = self.tbl_map.write().await;
let k: Cow<'b, str> = key.into();

Expand Down Expand Up @@ -714,6 +755,59 @@ mod tests {
);
}

#[tokio::test]
async fn test_adding_entries_to_blacklist() {
let index = IndexTable::default();

index
.add_target_to_blacklist(String::from(
"@third_party_jvm//3rdparty/jvm/com/google/code/findbugs:jsr305",
))
.await;

assert_eq!(
index
.get("org.apache.parquet.thrift.test.TestPerson.TestPersonTupleScheme")
.await
.is_none(),
true
);

// Insert new elements, one in blacklist, one not.
index
.insert(
"javax.annotation.foo.boof.Nullable",
(
236,
String::from("@third_party_jvm//3rdparty/jvm/com/google/code/findbugs:jsr305"),
),
)
.await;

index
.insert(
"javax.annotation.foo.boof.Nullable",
(
232,
String::from("@third_party_jvm//3rdparty/jvm/com/google/code/findbugs:jsr306"),
),
)
.await;

assert_eq!(
index
.get("javax.annotation.foo.boof.Nullable")
.await
.unwrap()
.as_vec()
.await,
vec![IndexTableValueEntry {
priority: Priority(232),
target: 1
}]
);
}

// #[tokio::test]
// async fn get_candidates_from_map() {
// let mut tbl_map = HashMap::new();
Expand Down
17 changes: 16 additions & 1 deletion bazelfe-core/src/jvm_indexer/jvm_indexer_app.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ struct Opt {
/// Refresh bazel deps only
#[clap(long)]
refresh_bazel_deps_only: bool,

/// Blacklist out these targets. Usually this matters when 3rdparty targets are poorly behaved and are not fully shaded
/// and may include classes from other targets.
#[clap(long)]
blacklist_targets_from_index: Option<Vec<String>>,
}

#[derive(Clone, Debug)]
Expand Down Expand Up @@ -209,6 +214,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {

let bazel_binary_path: String = (&opt.bazel_binary_path.to_str().unwrap()).to_string();

let target_blacklist = (&opt.blacklist_targets_from_index)
.clone()
.unwrap_or_default();

let allowed_rule_kinds: Vec<String> = vec![
"java_library",
"java_import",
Expand Down Expand Up @@ -281,7 +290,9 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut values = values.clone();
while !values.is_empty() {
let e = values.pop().unwrap();
if e.starts_with("@") {
if e.starts_with("@")
&& (e.ends_with("//jar:jar") || e.ends_with("//jar:file"))
{
results_mapping.insert(e, bazel_dep.to_string());
} else if e.starts_with("//external") {
if let Some(r) = mapping.get(&e) {
Expand Down Expand Up @@ -533,6 +544,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
bazelfe_core::index_table::IndexTable::default()
};

for e in target_blacklist {
index_table.add_target_to_blacklist(e).await
}

let target_completed_tracker = TargetCompletedTracker::new(all_found_targets);

let processors: Vec<Box<dyn BazelEventHandler>> = vec![
Expand Down

0 comments on commit 5af949d

Please sign in to comment.