diff --git a/src/coffee_lake/dp.c b/src/coffee_lake/dp.c index e351a8f..00f22bd 100644 --- a/src/coffee_lake/dp.c +++ b/src/coffee_lake/dp.c @@ -41,23 +41,11 @@ #define DDI_AUX_NATIVE_WRITE 0x8 #define DDI_AUX_NATIVE_READ 0x9 -#define DPCD_REV 0x0 -#define DPCD_MAX_LINK_RATE 0x1 -#define DPCD_MAX_LANE_COUNT 0x2 -#define DPCD_MAX_DOWNSPREAD 0x3 #define NO_AUX_HANDSHAKE_LINK_TRAINING (1 << 6) -#define DPCD_DOWNSTREAMPORT_PRESENT 0x5 -#define DPCD_EDP_CONFIGURATION_CAP 0xD -#define DPCD_DOWNSTREAM_PORT0_CAP 0x80 - -#define DPCD_TRAIN_PATTERN 0x102 - -#define DPCD_SET_POWER 0x600 #define DPCD_POWER_D0 1 #define DPCD_POWER_D3 2 - #define DP_TP_CTL(c) (0x64040 + ((c) * 0x100)) #define DP_TP_STS(c) (0x64044 + ((c) * 0x100)) @@ -354,7 +342,17 @@ uint8_t dp_aux_native_read(struct LilGpu* gpu, uint16_t addr) { return res.data[0]; } -static void dp_aux_native_write(struct LilGpu* gpu, uint16_t addr, uint8_t v) { +void dp_aux_native_readn(struct LilGpu* gpu, uint16_t addr, size_t n, void *buf) { + AuxRequest req = {0}; + req.request = DDI_AUX_NATIVE_READ; + req.address = addr; + req.size = n; + AuxResponse res = dp_aux_cmd(gpu, req); + + memcpy(buf, res.data, n); +} + +void dp_aux_native_write(struct LilGpu* gpu, uint16_t addr, uint8_t v) { AuxRequest req = {0}; req.request = DDI_AUX_NATIVE_WRITE; req.address = addr; @@ -363,6 +361,15 @@ static void dp_aux_native_write(struct LilGpu* gpu, uint16_t addr, uint8_t v) { dp_aux_cmd(gpu, req); } +void dp_aux_native_writen(struct LilGpu* gpu, uint16_t addr, size_t n, void *buf) { + AuxRequest req = {0}; + req.request = DDI_AUX_NATIVE_WRITE; + req.address = addr; + req.size = n; + memcpy(req.tx, buf, n); + dp_aux_cmd(gpu, req); +} + #define DDC_SEGMENT 0x30 #define DDC_ADDR 0x50 #define EDID_SIZE 128 @@ -512,15 +519,15 @@ static void dp_set_sink_power(struct LilGpu* gpu, struct LilConnector* connector if(on) { lil_panic("TODO: Turn Sink on"); } else { - uint8_t downstream = dp_aux_native_read(gpu, DPCD_DOWNSTREAMPORT_PRESENT); + uint8_t downstream = dp_aux_native_read(gpu, DOWNSTREAMPORT_PRESENT); if(rev == 0x11 && (downstream & 1)) { - uint8_t port0 = dp_aux_native_read(gpu, DPCD_DOWNSTREAM_PORT0_CAP); + uint8_t port0 = dp_aux_native_read(gpu, DOWNSTREAM_PORT0_CAP); if(port0 & (1 << 3)) { // HPD Aware return; } } - dp_aux_native_write(gpu, DPCD_SET_POWER, DPCD_POWER_D3); + dp_aux_native_write(gpu, SET_POWER, DPCD_POWER_D3); } } @@ -548,7 +555,7 @@ void lil_cfl_dp_init(struct LilGpu* gpu, struct LilConnector* connector) { *cstate &= ~0x3; // Disable DC5 and DC6 state*/ - uint8_t cap = dp_aux_native_read(gpu, DPCD_EDP_CONFIGURATION_CAP); + uint8_t cap = dp_aux_native_read(gpu, EDP_CONFIGURATION_CAP); connector->type = (cap != 0) ? EDP : DISPLAYPORT; // Hacky, but it should work on any eDP display that is semi-modern, better option is to parse VBIOS edp_panel_on(gpu, connector); @@ -682,7 +689,7 @@ void lil_cfl_dp_pre_enable(struct LilGpu* gpu, struct LilConnector* connector) { v |= DPLL_CTRL1_PROGRAM_ENABLE(dpll); v &= ~DPLL_CTRL1_HDMI_MODE(dpll); // DP mode v &= ~DPLL_CTRL1_LINK_RATE_MASK(dpll); - v |= DPLL_CTRL1_LINK_RATE(dpll, dp_aux_native_read(gpu, DPCD_MAX_LINK_RATE)); + v |= DPLL_CTRL1_LINK_RATE(dpll, dp_aux_native_read(gpu, MAX_LINK_RATE)); *dpll_ctrl1 = v; (void)*dpll_ctrl1; @@ -720,7 +727,7 @@ void lil_cfl_dp_pre_enable(struct LilGpu* gpu, struct LilConnector* connector) { lil_sleep(5); - if(dp_aux_native_read(gpu, DPCD_REV) == 0x11 && dp_aux_native_read(gpu, DPCD_MAX_DOWNSPREAD) & NO_AUX_HANDSHAKE_LINK_TRAINING) { + if(dp_aux_native_read(gpu, DPCD_REV) == 0x11 && dp_aux_native_read(gpu, MAX_DOWNSPREAD) & NO_AUX_HANDSHAKE_LINK_TRAINING) { lil_sleep(2); v = *dp_tp_ctl; v &= ~DP_TP_CTL_TRAIN_MASK; @@ -774,7 +781,7 @@ LilDpMnValues lil_cfl_dp_calculate_mn(LilGpu* gpu, LilModeInfo* mode) { uint64_t m = 3 * mode->bpc * mode->clock * 1000; - uint8_t link_rate = dp_aux_native_read(gpu, DPCD_MAX_LINK_RATE); + uint8_t link_rate = dp_aux_native_read(gpu, MAX_LINK_RATE); uint64_t symbol_rate = 0; if(link_rate == 0x6) symbol_rate = 162000000; @@ -785,7 +792,7 @@ LilDpMnValues lil_cfl_dp_calculate_mn(LilGpu* gpu, LilModeInfo* mode) { else lil_panic("Unknown DP Link Speed"); - uint64_t n = 8 * symbol_rate * (dp_aux_native_read(gpu, DPCD_MAX_LANE_COUNT) & 0xF); + uint64_t n = 8 * symbol_rate * (dp_aux_native_read(gpu, MAX_LANE_COUNT) & 0xF); cancel_m_n(&m, &n, DATA_N_MAX); ret.data_m = m; ret.data_n = n; diff --git a/src/coffee_lake/dp.h b/src/coffee_lake/dp.h index d234f61..b1fdb00 100644 --- a/src/coffee_lake/dp.h +++ b/src/coffee_lake/dp.h @@ -2,6 +2,28 @@ #include +enum DPCD_ADDRESSES { + DPCD_REV = 0x0, + MAX_LINK_RATE = 0x1, + MAX_LANE_COUNT = 0x2, + MAX_DOWNSPREAD = 0x3, + DOWNSTREAMPORT_PRESENT = 0x5, + EDP_CONFIGURATION_CAP = 0xD, + TRAINING_AUX_RD_INTERVAL = 0xE, + DOWNSTREAM_PORT0_CAP = 0x80, + LINK_BW_SET = 0x100, + LANE_COUNT_SET = 0x101, + TRAINING_PATTERN_SET = 0x102, + TRAINING_LANE0_SET = 0x103, + LINK_RATE_SET = 0x115, + DP_LANE0_1_STATUS = 0x202, + DP_LANE2_3_STATUS = 0x203, + LANE_ALIGN_STATUS_UPDATED = 0x204, + ADJUST_REQUEST_LANE0_1 = 0x206, + SET_POWER = 0x600, + EDP_DPCD_REV = 0x700, +}; + void lil_cfl_dp_get_mode_info(LilGpu* gpu, LilModeInfo* out); bool lil_cfl_dp_is_connected (struct LilGpu* gpu, struct LilConnector* connector); @@ -23,3 +45,6 @@ typedef struct { LilDpMnValues lil_cfl_dp_calculate_mn(LilGpu* gpu, LilModeInfo* mode); uint8_t dp_aux_native_read(struct LilGpu* gpu, uint16_t addr); +void dp_aux_native_readn(struct LilGpu* gpu, uint16_t addr, size_t n, void *buf); +void dp_aux_native_write(struct LilGpu* gpu, uint16_t addr, uint8_t v); +void dp_aux_native_writen(struct LilGpu* gpu, uint16_t addr, size_t n, void *buf); diff --git a/src/coffee_lake/gtt.c b/src/coffee_lake/gtt.c index 31d4904..9671207 100644 --- a/src/coffee_lake/gtt.c +++ b/src/coffee_lake/gtt.c @@ -1,17 +1,22 @@ -#include "gtt.h" - #include +#include "src/helpers.h" +#include "src/coffee_lake/gtt.h" + +#define GTT_HAW 39 + void lil_cfl_vmem_clear(LilGpu* gpu) { for (size_t i = 0; i < (gpu->gtt_size >> 12); i++) { - volatile uint64_t* gtt = (uint64_t*)(gpu->gtt_address + i * 8); - *gtt = 0; + GTT64_ENTRY(gpu, i << 12) = 0; } } void lil_cfl_vmem_map(LilGpu* gpu, uint64_t host, GpuAddr gpu_addr) { - if ((host & ~0xFFFFFFFFFF) != 0) - lil_panic("Coffee Lake GPUs only supports 40-bit host addresses"); // TODO: Servers support 46-bits (At least according to the Skylake PRMs) support this + // TODO: Servers support 46-bits (At least according to the Skylake PRMs); support this + uint64_t mask = ((1UL << GTT_HAW) - 1) & ~0xFFF; + + if ((host & ~mask) != 0) + lil_panic("Coffee Lake GPUs only support " STRINGIFY(GTT_HAW) "-bit host addresses"); - *(volatile uint64_t*)(gpu->gtt_address + (gpu_addr / 0x1000) * 8) = host | 1; // Present + GTT64_ENTRY(gpu, gpu_addr) = (host & mask) | GTT_PAGE_PRESENT; } diff --git a/src/coffee_lake/gtt.h b/src/coffee_lake/gtt.h index 5f88e66..dcbdc02 100644 --- a/src/coffee_lake/gtt.h +++ b/src/coffee_lake/gtt.h @@ -2,5 +2,7 @@ #include +#include "src/gtt.h" + void lil_cfl_vmem_clear(LilGpu* gpu); void lil_cfl_vmem_map(LilGpu* gpu, uint64_t host, GpuAddr gpu_addr); diff --git a/src/gtt.h b/src/gtt.h new file mode 100644 index 0000000..517373a --- /dev/null +++ b/src/gtt.h @@ -0,0 +1,6 @@ +#pragma once + +#define GTT_PAGE_PRESENT 1 + +#define GTT64_ENTRY(gpu, gpu_addr) (*(volatile uint64_t *) ((gpu)->gtt_address + ((gpu_addr) >> 12) * 8)) +#define GTT32_ENTRY(gpu, gpu_addr) (*(volatile uint32_t *) ((gpu)->gtt_address + ((gpu_addr) >> 12) * 4)) diff --git a/src/helpers.h b/src/helpers.h new file mode 100644 index 0000000..6c6f6b6 --- /dev/null +++ b/src/helpers.h @@ -0,0 +1,4 @@ +#pragma once + +#define STRINGIFY(a) STRINGIFY_(a) +#define STRINGIFY_(a) #a diff --git a/src/ivy_bridge/gtt.c b/src/ivy_bridge/gtt.c index 749f05b..eea751d 100644 --- a/src/ivy_bridge/gtt.c +++ b/src/ivy_bridge/gtt.c @@ -1,11 +1,11 @@ -#include "gtt.h" - #include +#include "src/gtt.h" +#include "src/ivy_bridge/gtt.h" + void lil_ivb_vmem_clear(LilGpu* gpu) { for (size_t i = 0; i < (gpu->gtt_size >> 12); i++) { - volatile uint32_t* gtt = (uint32_t*)(gpu->gtt_address + i * 4); - *gtt = 0; + GTT32_ENTRY(gpu, i << 12) = 0; } } @@ -13,5 +13,5 @@ void lil_ivb_vmem_map(LilGpu* gpu, uint64_t host, GpuAddr gpu_addr) { if ((host & ~0xFFFFFFFFFF) != 0) lil_panic("Ivy Bridge GPU only supports 40-bit host addresses"); - *(volatile uint32_t*)(gpu->gtt_address + (gpu_addr / 0x1000) * 4) = host | (((host >> 32) & 0xFF) << 4) | 0b110 | 1; // MLC/LLC Caching, Present + GTT32_ENTRY(gpu, gpu_addr) = host | (((host >> 32) & 0xFF) << 4) | GTT_IVB_CACHE_MLC_LLC | GTT_PAGE_PRESENT; } diff --git a/src/ivy_bridge/gtt.h b/src/ivy_bridge/gtt.h index c7ef138..270ae64 100644 --- a/src/ivy_bridge/gtt.h +++ b/src/ivy_bridge/gtt.h @@ -2,5 +2,9 @@ #include +#include "src/gtt.h" + +#define GTT_IVB_CACHE_MLC_LLC 0b110 + void lil_ivb_vmem_clear(LilGpu* gpu); void lil_ivb_vmem_map(LilGpu* gpu, uint64_t host, GpuAddr gpu_addr); diff --git a/src/pci.c b/src/pci.c index b28d4df..d8f3a87 100644 --- a/src/pci.c +++ b/src/pci.c @@ -5,6 +5,10 @@ #include void lil_get_bar(void* device, int bar, uintptr_t* obase, uintptr_t* len) { + /* disable Bus Mastering and Memory + I/O space access */ + uint16_t command = lil_pci_readw(device, 4); + lil_pci_writew(device, 4, command & ~7); /* Bus Master | Memory Space | I/O Space */ + size_t reg_index = 0x10 + bar * 4; uint64_t bar_low = lil_pci_readd(device, reg_index), bar_size_low; uint64_t bar_high = 0, bar_size_high = ~0; @@ -35,4 +39,7 @@ void lil_get_bar(void* device, int bar, uintptr_t* obase, uintptr_t* len) { *obase = base; *len = size; + + /* restore Bus Mastering and Memory + I/O space access */ + lil_pci_writew(device, 4, command); }