Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

git11 fix #153

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
229 changes: 229 additions & 0 deletions src/PARAMS.med_core
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
# File : PARAMS.kaby_lake
# Date : 03/06/19
# Description : Kaby Lake Configuration
#
# Based on documentation found here:
# https://en.wikichip.org/wiki/intel/microarchitectures/kaby_lake

## Simulation Parameters
--mode full
--model cmp
--sim_limit none

## Core Parameters

# Femptoseconds, 3.2GHz, used for energy estimates.
--chip_cycle_time 312500


### Fetch Stage
--fetch_off_path_ops 1
--fetch_across_cache_lines 1

# Will break the packet upon a taken branch.
--fetch_break_on_taken 1

# Number of bubble cycles to wait after taken branch.
--fetch_taken_bubble_cycles 0

#### ICache
--icache_size 32768
--icache_assoc 8
--icache_line_size 64

### Branch Predictor
--extra_recovery_cycles 0 # Number of cycles before the fetching of the first instructions after recovery.
--extra_redirect_cycles 0 # Number of cycles before the fetching of the first instructions after redirect.
--cfs_per_cycle 6 # Number of branches that can be predicted in a single cycle
--bp_update_at_retire 0 # Update the BP at retire. If false, update at the end of exec.
--update_bp_off_path 1 # Allow off path ops to update branch predictor state (e.g., ops when they complete exec stage).
--bp_mech tagescl
--taken_per_cycle 1
--fetch_queue_size 256
--fetch_queue_num_taken 16


#### BTB

# BTB model to use.
--btb_mech generic
--btb_entries 4096
--btb_assoc 4

# Allow the BTB to be updated by off path ops.
--btb_off_path_writes 1


#### CRS

# Enable return stack
--enable_crs 1
--crs_entries 32
--crs_realistic 1

### iBP
--enable_ibp 1 # Enable the indirect branch predictor
--ibtb_mech tc_tagged
# iBTB Model. tc_tagless, tc_tagged, tc_hybrid.
--ibtb_off_path_writes 1 # Allow off path ops to update the ibtb.
--tc_entries 4096
--tc_assoc 4

### Decode Stage
--decode_cycles 5


### Map Stage
--map_cycles 8

### Issue Stage

# Max number of instructions to be fetched, decoded, renamed, and issued per cycle.
--issue_width 8

--rs_fill_width 0
--rs_sizes 176
--rs_connections 0
--fu_types 0 0 0 0 0 0 0 0 0 0 0 0


### Exec Stage

### DCache
--dcache_size 49152
--dcache_read_ports 2
--dcache_write_ports 1
--dcache_banks 2
--dcache_assoc 12
--dcache_line_size 64

### Reorder/Retire Stage

# Max number of instructions to be retired per cycle.
--node_ret_width 16
--node_table_size 352
#--load_queue_entries 128
#--store_queue_entries 72

# Do not keep stores in RSVs on cache misses; TODO: confirm what this knob does
--stores_do_not_block_window 1

# TODO: confirm what this knob does
--prefs_do_not_block_window 1


## Uncore

### Mid-level cache

# Enable use of a midlevel cache between i/d and L1
--mlc_present 0

### LLC
--l1_size 1048576
--l1_banks 1
--l1_cycles 18
--l1_assoc 16
--l1_line_size 64
--l1_interleave_factor 64


### Prefetcher
--pref_framework_on 1
--pref_stream_on 1
--pref_stream_per_core_enable 1
--pref_shared_queues 0
--pref_train_on_pref_misses 0
--pref_oracle_train 0
--pref_ul1req_queue_overwrite_on_full 1

--stream_length 64
--stream_prefetch_n 4
--stream_start_dis 1
--stream_train_num 4
--stream_create_on_dc_miss 0
--stream_create_on_l1_miss 1

--pref_throttlefb_on=1
--pref_acc_thresh_1=0.75
--pref_acc_thresh_2=0.4
--pref_acc_thresh_3=0.4
--pref_timely_thresh=0.01
--pref_polpf_thresh=0.005
--pref_update_interval=8192

--mem_req_buffer_pref_watermark 4
--promote_to_higher_priority_mem_req_type 1

### Memory
--addr_translation random

--mem_priority_ifetch 0
--mem_priority_dfetch 1
--mem_priority_dstore 2
--mem_priority_iprf 3
--mem_priority_dprf 4
--mem_priority_wb 5
--mem_priority_wb_nodirty 5

--mem_req_buffer_entries 64
--mem_l1_fill_queue_entries 64
--va_page_size_bytes 4096
--bus_width_in_bytes 8

--ramulator_standard DDR4
--ramulator_speed DDR4_2400R
--ramulator_org DDR4_8Gb_x8
--ramulator_channels 2
--ramulator_ranks 1
--ramulator_bankgroups 4
--ramulator_banks 4
--ramulator_chip_width 8
--ramulator_rows 65536
--ramulator_cols 1024
--ramulator_scheduling_policy FRFCFS_Cap
--ramulator_readq_entries 64
--ramulator_writeq_entries 64
--ramulator_record_cmd_trace FALSE
--ramulator_print_cmd_trace FALSE
--ramulator_tCK 833333
--ramulator_tCL 16
--ramulator_tCCD 6
--ramulator_tCCDS 4
--ramulator_tCCDL 6
--ramulator_tCWL 12
--ramulator_tBL 4
--ramulator_tWTR 9
--ramulator_tWTRS 3
--ramulator_tWTRL 9
--ramulator_tRP 16
--ramulator_tRPpb 16
--ramulator_tRPab 16
--ramulator_tRCD 16
--ramulator_tRCDR 16
--ramulator_tRCDW 16
--ramulator_tRAS 39
--dram_tech_in_nm 32

## Other


### Debug
--debug_inst_start 1
--debug_inst_stop 100000000
--debug_cycle_start 1
--debug_cycle_stop 100000000


## Stats and Params
--dump_params 1
--dump_stats 1
--dump_trace 0

####################################
--set_off_path_confirmed 1

--order_beyond_bus 1

--mem_ooo_stores 1
--mem_obey_store_dep 1
49 changes: 38 additions & 11 deletions src/bp/bp.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ void init_bp_data(uns8 proc_id, Bp_Data* bp_data) {
/* bp_predict_op: predicts the target of a control flow instruction */

Addr bp_predict_op(Bp_Data* bp_data, Op* op, uns br_num, Addr fetch_addr) {
DEBUG(0,"before bp predict op, recovery_sch %d\n", op->oracle_info.recovery_sch);
Addr addr = fetch_addr;
/*Addr line_addr;*/
Addr* btb_target;
Expand Down Expand Up @@ -283,7 +284,9 @@ Addr bp_predict_op(Bp_Data* bp_data, Op* op, uns br_num, Addr fetch_addr) {
op->recovery_info.branchTarget = op->oracle_info.target;


DEBUG(0,"before timestamp, recovery_sch %d\n", op->oracle_info.recovery_sch);
bp_data->bp->timestamp_func(op);
DEBUG(0,"after timestamp, recovery_sch %d\n", op->oracle_info.recovery_sch);
if(USE_LATE_BP) {
bp_data->late_bp->timestamp_func(op);
}
Expand Down Expand Up @@ -369,15 +372,24 @@ Addr bp_predict_op(Bp_Data* bp_data, Op* op, uns br_num, Addr fetch_addr) {
op->oracle_info.pred = op->oracle_info.dir;
op->oracle_info.no_target = FALSE;
} else {
DEBUG(0,"before pred func, recovery_sch %d\n", op->oracle_info.recovery_sch);
op->oracle_info.pred = bp_data->bp->pred_func(op);
DEBUG(0,"after pred func, recovery_sch %d\n", op->oracle_info.recovery_sch);
if(USE_LATE_BP) {
op->oracle_info.late_pred = bp_data->late_bp->pred_func(op);
}
DEBUG(0,"after late pred func, recovery_sch %d\n", op->oracle_info.recovery_sch);
}

// Update history used by the rest of Scarab.
bp_data->global_hist = (bp_data->global_hist >> 1) |
(op->oracle_info.pred << 31);
if(USE_LATE_BP && DECOUPLED_BP){
bp_data->global_hist = (bp_data->global_hist >> 1) |
(op->oracle_info.late_pred << 31);
}
else{
bp_data->global_hist = (bp_data->global_hist >> 1) |
(op->oracle_info.pred << 31);
}

if(PERFECT_CBR_BTB ||
(PERFECT_NT_BTB && op->oracle_info.pred == NOT_TAKEN)) {
Expand Down Expand Up @@ -474,12 +486,7 @@ Addr bp_predict_op(Bp_Data* bp_data, Op* op, uns br_num, Addr fetch_addr) {
}
// }}}

// pred_target = convert_to_cmp_addr(op->proc_id, pred_target);

bp_data->bp->spec_update_func(op);
if(USE_LATE_BP) {
bp_data->late_bp->spec_update_func(op);
}
pred_target = convert_to_cmp_addr(op->proc_id, pred_target);

const Addr pc_plus_offset = ADDR_PLUS_OFFSET(
op->inst_info->addr, op->inst_info->trace_info.inst_size);
Expand All @@ -494,6 +501,10 @@ Addr bp_predict_op(Bp_Data* bp_data, Op* op, uns br_num, Addr fetch_addr) {
op->oracle_info.misfetch = !op->oracle_info.mispred &&
prediction != op->oracle_info.npc;

STAT_EVENT(op->proc_id, BP_ON_PATH_CORRECT + op->oracle_info.mispred +
2 * op->oracle_info.misfetch + 3 * op->off_path);
op->oracle_info.early_late_disagree = FALSE;
op->oracle_info.early_pred = op->oracle_info.pred;
if(USE_LATE_BP) {
const Addr late_prediction = op->oracle_info.late_pred ? pred_target :
pc_plus_offset;
Expand All @@ -503,8 +514,25 @@ Addr bp_predict_op(Bp_Data* bp_data, Op* op, uns br_num, Addr fetch_addr) {
(late_prediction != op->oracle_info.npc);
op->oracle_info.late_misfetch = !op->oracle_info.late_mispred &&
late_prediction != op->oracle_info.npc;
if(DECOUPLED_BP){
if(late_prediction != prediction){
op->oracle_info.early_late_disagree = TRUE;
}
op->oracle_info.pred = op->oracle_info.late_pred;
op->oracle_info.mispred = op->oracle_info.late_mispred;
op->oracle_info.misfetch= op->oracle_info.late_misfetch;
op->oracle_info.pred_npc = op->oracle_info.late_pred_npc;
}
}

if(!TAGE_NO_UNCOND_UPDATE || op->table_info->cf_type != CF_BR){
bp_data->bp->spec_update_func(op);
if(USE_LATE_BP) {
bp_data->late_bp->spec_update_func(op);
}
}


op->bp_cycle = cycle_count;

// {{{ stats and debugging
Expand All @@ -520,8 +548,6 @@ Addr bp_predict_op(Bp_Data* bp_data, Op* op, uns br_num, Addr fetch_addr) {
STAT_EVENT(op->proc_id, BTB_OFF_PATH_MISS);
}

STAT_EVENT(op->proc_id, BP_ON_PATH_CORRECT + op->oracle_info.mispred +
2 * op->oracle_info.misfetch + 3 * op->off_path);
STAT_EVENT(op->proc_id,
LATE_BP_ON_PATH_CORRECT + op->oracle_info.late_mispred +
2 * op->oracle_info.late_misfetch + 3 * op->off_path);
Expand Down Expand Up @@ -593,7 +619,8 @@ Addr bp_predict_op(Bp_Data* bp_data, Op* op, uns br_num, Addr fetch_addr) {
DEBUG(bp_data->proc_id, "low_conf_count:%d \n", td->td_info.low_conf_count);
}

return prediction;
DEBUG(0,"end bp predict op, recovery_sch %d\n", op->oracle_info.recovery_sch);
return op->oracle_info.pred_npc;
}


Expand Down
Loading
Loading