From ad8ef093b019a6c1e5eae882ecf6545fcef1f0e8 Mon Sep 17 00:00:00 2001
From: no92 <no92.mail@gmail.com>
Date: Mon, 19 Jun 2023 17:36:59 +0200
Subject: [PATCH 1/5] ivb/gtt: clean up GTT code

---
 src/gtt.h            |  6 ++++++
 src/ivy_bridge/gtt.c | 10 +++++-----
 src/ivy_bridge/gtt.h |  4 ++++
 3 files changed, 15 insertions(+), 5 deletions(-)
 create mode 100644 src/gtt.h

diff --git a/src/gtt.h b/src/gtt.h
new file mode 100644
index 0000000..517373a
--- /dev/null
+++ b/src/gtt.h
@@ -0,0 +1,6 @@
+#pragma once
+
+#define GTT_PAGE_PRESENT 1
+
+#define GTT64_ENTRY(gpu, gpu_addr) (*(volatile uint64_t *) ((gpu)->gtt_address + ((gpu_addr) >> 12) * 8))
+#define GTT32_ENTRY(gpu, gpu_addr) (*(volatile uint32_t *) ((gpu)->gtt_address + ((gpu_addr) >> 12) * 4))
diff --git a/src/ivy_bridge/gtt.c b/src/ivy_bridge/gtt.c
index 749f05b..eea751d 100644
--- a/src/ivy_bridge/gtt.c
+++ b/src/ivy_bridge/gtt.c
@@ -1,11 +1,11 @@
-#include "gtt.h"
-
 #include <lil/imports.h>
 
+#include "src/gtt.h"
+#include "src/ivy_bridge/gtt.h"
+
 void lil_ivb_vmem_clear(LilGpu* gpu) {
     for (size_t i = 0; i < (gpu->gtt_size >> 12); i++) {
-        volatile uint32_t* gtt = (uint32_t*)(gpu->gtt_address + i * 4);
-        *gtt = 0;
+        GTT32_ENTRY(gpu, i << 12) = 0;
     }
 }
 
@@ -13,5 +13,5 @@ void lil_ivb_vmem_map(LilGpu* gpu, uint64_t host, GpuAddr gpu_addr) {
     if ((host & ~0xFFFFFFFFFF) != 0)
         lil_panic("Ivy Bridge GPU only supports 40-bit host addresses");
 
-    *(volatile uint32_t*)(gpu->gtt_address + (gpu_addr / 0x1000) * 4) = host | (((host >> 32) & 0xFF) << 4) | 0b110 | 1; // MLC/LLC Caching, Present
+    GTT32_ENTRY(gpu, gpu_addr) = host | (((host >> 32) & 0xFF) << 4) | GTT_IVB_CACHE_MLC_LLC | GTT_PAGE_PRESENT;
 }
diff --git a/src/ivy_bridge/gtt.h b/src/ivy_bridge/gtt.h
index c7ef138..270ae64 100644
--- a/src/ivy_bridge/gtt.h
+++ b/src/ivy_bridge/gtt.h
@@ -2,5 +2,9 @@
 
 #include <lil/intel.h>
 
+#include "src/gtt.h"
+
+#define GTT_IVB_CACHE_MLC_LLC 0b110
+
 void lil_ivb_vmem_clear(LilGpu* gpu);
 void lil_ivb_vmem_map(LilGpu* gpu, uint64_t host, GpuAddr gpu_addr);

From 164e612767e4c339b39e9e8952cbf505f3f77dc9 Mon Sep 17 00:00:00 2001
From: no92 <no92.mail@gmail.com>
Date: Wed, 14 Jun 2023 23:05:31 +0200
Subject: [PATCH 2/5] cfl/gtt: clean up GTT code

---
 src/coffee_lake/gtt.c | 19 ++++++++++++-------
 src/coffee_lake/gtt.h |  2 ++
 src/helpers.h         |  4 ++++
 3 files changed, 18 insertions(+), 7 deletions(-)
 create mode 100644 src/helpers.h

diff --git a/src/coffee_lake/gtt.c b/src/coffee_lake/gtt.c
index 31d4904..9671207 100644
--- a/src/coffee_lake/gtt.c
+++ b/src/coffee_lake/gtt.c
@@ -1,17 +1,22 @@
-#include "gtt.h"
-
 #include <lil/imports.h>
 
+#include "src/helpers.h"
+#include "src/coffee_lake/gtt.h"
+
+#define GTT_HAW 39
+
 void lil_cfl_vmem_clear(LilGpu* gpu) {
     for (size_t i = 0; i < (gpu->gtt_size >> 12); i++) {
-        volatile uint64_t* gtt = (uint64_t*)(gpu->gtt_address + i * 8);
-        *gtt = 0;
+        GTT64_ENTRY(gpu, i << 12) = 0;
     }
 }
 
 void lil_cfl_vmem_map(LilGpu* gpu, uint64_t host, GpuAddr gpu_addr) {
-    if ((host & ~0xFFFFFFFFFF) != 0)
-        lil_panic("Coffee Lake GPUs only supports 40-bit host addresses"); // TODO: Servers support 46-bits (At least according to the Skylake PRMs) support this
+    // TODO: Servers support 46-bits (At least according to the Skylake PRMs); support this
+    uint64_t mask = ((1UL << GTT_HAW) - 1) & ~0xFFF;
+
+    if ((host & ~mask) != 0)
+        lil_panic("Coffee Lake GPUs only support " STRINGIFY(GTT_HAW) "-bit host addresses");
 
-    *(volatile uint64_t*)(gpu->gtt_address + (gpu_addr / 0x1000) * 8) = host | 1; // Present
+    GTT64_ENTRY(gpu, gpu_addr) = (host & mask) | GTT_PAGE_PRESENT;
 }
diff --git a/src/coffee_lake/gtt.h b/src/coffee_lake/gtt.h
index 5f88e66..dcbdc02 100644
--- a/src/coffee_lake/gtt.h
+++ b/src/coffee_lake/gtt.h
@@ -2,5 +2,7 @@
 
 #include <lil/intel.h>
 
+#include "src/gtt.h"
+
 void lil_cfl_vmem_clear(LilGpu* gpu);
 void lil_cfl_vmem_map(LilGpu* gpu, uint64_t host, GpuAddr gpu_addr);
diff --git a/src/helpers.h b/src/helpers.h
new file mode 100644
index 0000000..6c6f6b6
--- /dev/null
+++ b/src/helpers.h
@@ -0,0 +1,4 @@
+#pragma once
+
+#define STRINGIFY(a) STRINGIFY_(a)
+#define STRINGIFY_(a) #a

From 957618d2e62eaccc58f7a2f8a58e7e2f28acdd7a Mon Sep 17 00:00:00 2001
From: no92 <no92.mail@gmail.com>
Date: Wed, 14 Jun 2023 23:12:58 +0200
Subject: [PATCH 3/5] cfl/dp: make AUX (read|write)[n] functions public

---
 src/coffee_lake/dp.c | 21 ++++++++++++++++++++-
 src/coffee_lake/dp.h |  3 +++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/coffee_lake/dp.c b/src/coffee_lake/dp.c
index e351a8f..9349b3c 100644
--- a/src/coffee_lake/dp.c
+++ b/src/coffee_lake/dp.c
@@ -354,7 +354,17 @@ uint8_t dp_aux_native_read(struct LilGpu* gpu, uint16_t addr) {
     return res.data[0];
 }
 
-static void dp_aux_native_write(struct LilGpu* gpu, uint16_t addr, uint8_t v) {
+void dp_aux_native_readn(struct LilGpu* gpu, uint16_t addr, size_t n, void *buf) {
+    AuxRequest req = {0};
+    req.request = DDI_AUX_NATIVE_READ;
+    req.address = addr;
+    req.size = n;
+    AuxResponse res = dp_aux_cmd(gpu, req);
+
+    memcpy(buf, res.data, n);
+}
+
+void dp_aux_native_write(struct LilGpu* gpu, uint16_t addr, uint8_t v) {
     AuxRequest req = {0};
     req.request = DDI_AUX_NATIVE_WRITE;
     req.address = addr;
@@ -363,6 +373,15 @@ static void dp_aux_native_write(struct LilGpu* gpu, uint16_t addr, uint8_t v) {
     dp_aux_cmd(gpu, req);
 }
 
+void dp_aux_native_writen(struct LilGpu* gpu, uint16_t addr, size_t n, void *buf) {
+    AuxRequest req = {0};
+    req.request = DDI_AUX_NATIVE_WRITE;
+    req.address = addr;
+    req.size = n;
+    memcpy(req.tx, buf, n);
+    dp_aux_cmd(gpu, req);
+}
+
 #define DDC_SEGMENT 0x30
 #define DDC_ADDR 0x50
 #define EDID_SIZE 128
diff --git a/src/coffee_lake/dp.h b/src/coffee_lake/dp.h
index d234f61..bd5bbe8 100644
--- a/src/coffee_lake/dp.h
+++ b/src/coffee_lake/dp.h
@@ -23,3 +23,6 @@ typedef struct {
 LilDpMnValues lil_cfl_dp_calculate_mn(LilGpu* gpu, LilModeInfo* mode);
 
 uint8_t dp_aux_native_read(struct LilGpu* gpu, uint16_t addr);
+void dp_aux_native_readn(struct LilGpu* gpu, uint16_t addr, size_t n, void *buf);
+void dp_aux_native_write(struct LilGpu* gpu, uint16_t addr, uint8_t v);
+void dp_aux_native_writen(struct LilGpu* gpu, uint16_t addr, size_t n, void *buf);

From 13caf3422010583b721c6b9b779eec5e9a986ed7 Mon Sep 17 00:00:00 2001
From: no92 <no92.mail@gmail.com>
Date: Wed, 14 Jun 2023 23:22:02 +0200
Subject: [PATCH 4/5] cfl/dp: move DPCD addresses to an enum

---
 src/coffee_lake/dp.c | 28 ++++++++--------------------
 src/coffee_lake/dp.h | 22 ++++++++++++++++++++++
 2 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/src/coffee_lake/dp.c b/src/coffee_lake/dp.c
index 9349b3c..00f22bd 100644
--- a/src/coffee_lake/dp.c
+++ b/src/coffee_lake/dp.c
@@ -41,23 +41,11 @@
 #define DDI_AUX_NATIVE_WRITE 0x8
 #define DDI_AUX_NATIVE_READ 0x9
 
-#define DPCD_REV 0x0
-#define DPCD_MAX_LINK_RATE 0x1
-#define DPCD_MAX_LANE_COUNT 0x2
-#define DPCD_MAX_DOWNSPREAD 0x3
 #define NO_AUX_HANDSHAKE_LINK_TRAINING (1 << 6)
 
-#define DPCD_DOWNSTREAMPORT_PRESENT 0x5
-#define DPCD_EDP_CONFIGURATION_CAP 0xD
-#define DPCD_DOWNSTREAM_PORT0_CAP 0x80
-
-#define DPCD_TRAIN_PATTERN 0x102
-
-#define DPCD_SET_POWER 0x600
 #define DPCD_POWER_D0 1
 #define DPCD_POWER_D3 2
 
-
 #define DP_TP_CTL(c) (0x64040 + ((c) * 0x100))
 #define DP_TP_STS(c) (0x64044 + ((c) * 0x100))
 
@@ -531,15 +519,15 @@ static void dp_set_sink_power(struct LilGpu* gpu, struct LilConnector* connector
     if(on) {
         lil_panic("TODO: Turn Sink on");
     } else {
-        uint8_t downstream = dp_aux_native_read(gpu, DPCD_DOWNSTREAMPORT_PRESENT);
+        uint8_t downstream = dp_aux_native_read(gpu, DOWNSTREAMPORT_PRESENT);
         if(rev == 0x11 && (downstream & 1)) {
-            uint8_t port0 = dp_aux_native_read(gpu, DPCD_DOWNSTREAM_PORT0_CAP);
+            uint8_t port0 = dp_aux_native_read(gpu, DOWNSTREAM_PORT0_CAP);
             if(port0 & (1 << 3)) { // HPD Aware
                 return;
             }
         }
 
-        dp_aux_native_write(gpu, DPCD_SET_POWER, DPCD_POWER_D3);
+        dp_aux_native_write(gpu, SET_POWER, DPCD_POWER_D3);
     }
 }
 
@@ -567,7 +555,7 @@ void lil_cfl_dp_init(struct LilGpu* gpu, struct LilConnector* connector) {
 
     *cstate &= ~0x3; // Disable DC5 and DC6 state*/
 
-    uint8_t cap = dp_aux_native_read(gpu, DPCD_EDP_CONFIGURATION_CAP);
+    uint8_t cap = dp_aux_native_read(gpu, EDP_CONFIGURATION_CAP);
     connector->type = (cap != 0) ? EDP : DISPLAYPORT; // Hacky, but it should work on any eDP display that is semi-modern, better option is to parse VBIOS
 
     edp_panel_on(gpu, connector);
@@ -701,7 +689,7 @@ void lil_cfl_dp_pre_enable(struct LilGpu* gpu, struct LilConnector* connector) {
     v |= DPLL_CTRL1_PROGRAM_ENABLE(dpll);
     v &= ~DPLL_CTRL1_HDMI_MODE(dpll); // DP mode
     v &= ~DPLL_CTRL1_LINK_RATE_MASK(dpll);
-    v |= DPLL_CTRL1_LINK_RATE(dpll, dp_aux_native_read(gpu, DPCD_MAX_LINK_RATE));
+    v |= DPLL_CTRL1_LINK_RATE(dpll, dp_aux_native_read(gpu, MAX_LINK_RATE));
     *dpll_ctrl1 = v;
     (void)*dpll_ctrl1;
 
@@ -739,7 +727,7 @@ void lil_cfl_dp_pre_enable(struct LilGpu* gpu, struct LilConnector* connector) {
 
     lil_sleep(5);
 
-    if(dp_aux_native_read(gpu, DPCD_REV) == 0x11 && dp_aux_native_read(gpu, DPCD_MAX_DOWNSPREAD) & NO_AUX_HANDSHAKE_LINK_TRAINING) {
+    if(dp_aux_native_read(gpu, DPCD_REV) == 0x11 && dp_aux_native_read(gpu, MAX_DOWNSPREAD) & NO_AUX_HANDSHAKE_LINK_TRAINING) {
         lil_sleep(2);
         v = *dp_tp_ctl;
         v &= ~DP_TP_CTL_TRAIN_MASK;
@@ -793,7 +781,7 @@ LilDpMnValues lil_cfl_dp_calculate_mn(LilGpu* gpu, LilModeInfo* mode) {
 
     uint64_t m = 3 * mode->bpc * mode->clock * 1000;
 
-    uint8_t link_rate = dp_aux_native_read(gpu, DPCD_MAX_LINK_RATE);
+    uint8_t link_rate = dp_aux_native_read(gpu, MAX_LINK_RATE);
     uint64_t symbol_rate = 0;
     if(link_rate == 0x6)
         symbol_rate = 162000000;
@@ -804,7 +792,7 @@ LilDpMnValues lil_cfl_dp_calculate_mn(LilGpu* gpu, LilModeInfo* mode) {
     else
         lil_panic("Unknown DP Link Speed");
 
-    uint64_t n = 8 * symbol_rate * (dp_aux_native_read(gpu, DPCD_MAX_LANE_COUNT) & 0xF);
+    uint64_t n = 8 * symbol_rate * (dp_aux_native_read(gpu, MAX_LANE_COUNT) & 0xF);
     cancel_m_n(&m, &n, DATA_N_MAX);
     ret.data_m = m;
     ret.data_n = n;
diff --git a/src/coffee_lake/dp.h b/src/coffee_lake/dp.h
index bd5bbe8..b1fdb00 100644
--- a/src/coffee_lake/dp.h
+++ b/src/coffee_lake/dp.h
@@ -2,6 +2,28 @@
 
 #include <lil/intel.h>
 
+enum DPCD_ADDRESSES {
+	DPCD_REV = 0x0,
+	MAX_LINK_RATE = 0x1,
+	MAX_LANE_COUNT = 0x2,
+	MAX_DOWNSPREAD = 0x3,
+	DOWNSTREAMPORT_PRESENT = 0x5,
+	EDP_CONFIGURATION_CAP = 0xD,
+	TRAINING_AUX_RD_INTERVAL = 0xE,
+	DOWNSTREAM_PORT0_CAP = 0x80,
+	LINK_BW_SET = 0x100,
+	LANE_COUNT_SET = 0x101,
+	TRAINING_PATTERN_SET = 0x102,
+	TRAINING_LANE0_SET = 0x103,
+	LINK_RATE_SET = 0x115,
+	DP_LANE0_1_STATUS = 0x202,
+	DP_LANE2_3_STATUS = 0x203,
+	LANE_ALIGN_STATUS_UPDATED = 0x204,
+	ADJUST_REQUEST_LANE0_1 = 0x206,
+	SET_POWER = 0x600,
+	EDP_DPCD_REV = 0x700,
+};
+
 void lil_cfl_dp_get_mode_info(LilGpu* gpu, LilModeInfo* out);
 
 bool lil_cfl_dp_is_connected (struct LilGpu* gpu, struct LilConnector* connector);

From f7854a775fe257fdb60a0be2996800df6dc79089 Mon Sep 17 00:00:00 2001
From: no92 <no92.mail@gmail.com>
Date: Thu, 15 Jun 2023 07:59:54 +0200
Subject: [PATCH 5/5] pci: disable bus mastering, I/O and mem access before
 reading BARs

---
 src/pci.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/pci.c b/src/pci.c
index b28d4df..d8f3a87 100644
--- a/src/pci.c
+++ b/src/pci.c
@@ -5,6 +5,10 @@
 #include <stddef.h>
 
 void lil_get_bar(void* device, int bar, uintptr_t* obase, uintptr_t* len) {
+	/* disable Bus Mastering and Memory + I/O space access */
+	uint16_t command = lil_pci_readw(device, 4);
+	lil_pci_writew(device, 4, command & ~7); /* Bus Master | Memory Space | I/O Space */
+
     size_t reg_index = 0x10 + bar * 4;
     uint64_t bar_low = lil_pci_readd(device, reg_index), bar_size_low;
     uint64_t bar_high = 0, bar_size_high = ~0;
@@ -35,4 +39,7 @@ void lil_get_bar(void* device, int bar, uintptr_t* obase, uintptr_t* len) {
 
     *obase = base;
     *len = size;
+
+	/* restore Bus Mastering and Memory + I/O space access */
+	lil_pci_writew(device, 4, command);
 }