diff --git a/Makefile b/Makefile
index bbf8120..ca482db 100644
--- a/Makefile
+++ b/Makefile
@@ -1,41 +1,41 @@
-export GITHASH 		:= $(shell git rev-parse --short HEAD)
-export VERSION 		:= 2.0.0
-export API_VERSION 	:= 4
-export WANT_FLAC 	:= 1
-export WANT_MP3 	:= 1
-export WANT_WAV 	:= 1
-
-all: overlay nxExt module
-
-clean:
-	$(MAKE) -C sys-tune/nxExt clean
-	$(MAKE) -C overlay clean
-	$(MAKE) -C sys-tune clean
-	-rm -r dist
-	-rm sys-tune-*-*.zip
-
-overlay:
-	$(MAKE) -C overlay
-
-nxExt:
-	$(MAKE) -C sys-tune/nxExt
-
-module:
-	$(MAKE) -C sys-tune
-
-dist: all
-	mkdir -p dist/switch/.overlays
-	mkdir -p dist/atmosphere/contents/4200000000000000
-	cp sys-tune/sys-tune.nsp dist/atmosphere/contents/4200000000000000/exefs.nsp
-	cp overlay/sys-tune-overlay.ovl dist/switch/.overlays/
-	cp sys-tune/toolbox.json dist/atmosphere/contents/4200000000000000/
-	cd dist; zip -r sys-tune-$(VERSION)-$(GITHASH).zip ./**/; cd ../;
-	-hactool -t nso sys-tune/sys-tune.nso
-
-wah:
-	$(MAKE) overlay -j66
-	$(MAKE) sys-tune/nxExt -j66
-	$(MAKE) sys-tune -j66
-	$(MAKE) all -j66
-	
-.PHONY: all overlay module
+export GITHASH 		:= $(shell git rev-parse --short HEAD)
+export VERSION 		:= 2.0.0
+export API_VERSION 	:= 4
+export WANT_FLAC 	:= 1
+export WANT_MP3 	:= 1
+export WANT_WAV 	:= 1
+
+all: overlay nxExt module
+
+clean:
+	$(MAKE) -C sys-tune/nxExt clean
+	$(MAKE) -C overlay clean
+	$(MAKE) -C sys-tune clean
+	-rm -r dist
+	-rm sys-tune-*-*.zip
+
+overlay:
+	$(MAKE) -C overlay
+
+nxExt:
+	$(MAKE) -C sys-tune/nxExt
+
+module:
+	$(MAKE) -C sys-tune
+
+dist: all
+	mkdir -p dist/switch/.overlays
+	mkdir -p dist/atmosphere/contents/4200000000000000
+	cp sys-tune/sys-tune.nsp dist/atmosphere/contents/4200000000000000/exefs.nsp
+	cp overlay/sys-tune-overlay.ovl dist/switch/.overlays/
+	cp sys-tune/toolbox.json dist/atmosphere/contents/4200000000000000/
+	cd dist; zip -r sys-tune-$(VERSION)-$(GITHASH).zip ./**/; cd ../;
+	-hactool -t nso sys-tune/sys-tune.nso
+
+wah:
+	$(MAKE) overlay -j66
+	$(MAKE) sys-tune/nxExt -j66
+	$(MAKE) sys-tune -j66
+	$(MAKE) all -j66
+	
+.PHONY: all overlay module
diff --git a/README.md b/README.md
index 6bc855b..be9e22e 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,25 @@
-sys-tune is fixed yall
+# sys-tune
+## Background audio player for the Nintendo switch + Tesla overlay
+
+## Installation
+1. Download the release zip from [here](https://github.com/HookedBehemoth/sys-tune/releases/latest)
+2. Extract the zip to the root of your sd card.
+3. Put mp3, flac, wav or wave files to your sd card.
+
+You can manage playback via the Tesla overlay in the release.
+
+## Screenshots
+![Main](/sample/libtesla_1586882452.jpg)
+![Main](/sample/libtesla_1586882672.jpg)
+![Main](/sample/libtesla_1586882735.jpg)
+(Alpha values are wrong in these screenshots. The overlay will be less transparent.)
+
+## Special thanks to:
+- [mackron](http://mackron.github.io/) who made the awesome [audio decoders used here.](https://github.com/mackron/dr_libs/)
+- [WerWolv](https://werwolv.net/) for making libtesla, the UI library used for the control overlay.
+- [TotalJustice](https://github.com/ITotalJustice) for bug fixes, adding some features and bad code.
+
+## Info for developers
+I implemented an IPC interface accessible via service wrappers [here](/ipc/).
+
+My [Tesla overlay](/overlay/source/) uses these bindings.
diff --git a/common/config/config.cpp b/common/config/config.cpp
index cd496c4..7b13dac 100644
--- a/common/config/config.cpp
+++ b/common/config/config.cpp
@@ -1,123 +1,123 @@
-#include "config.hpp"
-#include "sdmc/sdmc.hpp"
-#include "minIni/minIni.h"
-#include <cstdio>
-
-namespace config {
-
-namespace {
-
-const char CONFIG_PATH[]{"/config/sys-tune/config.ini"};
-// blacklist uses it's own config file because eventually a database
-// may be setup and users can easily update their blacklist by downloading
-// an updated blacklist.ini.
-// Also, the blacklist lookup needs to be as fast as possible
-// (literally a race until the title opens audren), so a seperate, smaller file is ideal.
-const char BLACKLIST_PATH[]{"/config/sys-tune/blacklist.ini"};
-
-void create_config_dir() {
-    /* Creating directory on every set call looks sus, but the user may delete the dir */
-    /* whilst the sys-mod is running and then any changes made via the overlay */
-    /* is lost, which sucks. */
-    sdmc::CreateFolder("/config");
-    sdmc::CreateFolder("/config/sys-tune");
-}
-
-auto get_tid_str(u64 tid) -> const char* {
-    static char buf[21]{};
-    std::sprintf(buf, "%016lX", tid);
-    return buf;
-}
-
-}
-
-auto get_shuffle() -> bool {
-    return ini_getbool("config", "shuffle", false, CONFIG_PATH);
-}
-
-void set_shuffle(bool value) {
-    create_config_dir();
-    ini_putl("config", "shuffle", value, CONFIG_PATH);
-}
-
-auto get_repeat() -> int {
-    return ini_getl("config", "repeat", 1, CONFIG_PATH);
-}
-
-void set_repeat(int value) {
-    create_config_dir();
-    ini_putl("config", "repeat", value, CONFIG_PATH);
-}
-
-auto get_bass() -> double {
-    return ini_getf("config", "bass", 1.f, CONFIG_PATH);
-}
-
-void set_bass(double value) {
-    create_config_dir();
-    ini_putf("config", "bass", value, CONFIG_PATH);
-}
-
-auto get_volume() -> double {
-    return ini_getf("config", "volume", 1.f, CONFIG_PATH);
-}
-
-void set_volume(double value) {
-    create_config_dir();
-    ini_putf("config", "volume", value, CONFIG_PATH);
-}
-
-auto has_title_enabled(u64 tid) -> bool {
-    return ini_haskey("title", get_tid_str(tid), CONFIG_PATH);
-}
-
-auto get_title_enabled(u64 tid) -> bool {
-    return ini_getbool("title", get_tid_str(tid), true, CONFIG_PATH);
-}
-
-void set_title_enabled(u64 tid, bool value) {
-    create_config_dir();
-    ini_putl("title", get_tid_str(tid), value, CONFIG_PATH);
-}
-
-auto get_title_enabled_default() -> bool {
-    return ini_getbool("title", "default", true, CONFIG_PATH);
-}
-
-void set_title_enabled_default(bool value) {
-    create_config_dir();
-    ini_putl("title", "default", value, CONFIG_PATH);
-}
-
-auto has_title_volume(u64 tid) -> bool {
-    return ini_haskey("volume", get_tid_str(tid), CONFIG_PATH);
-}
-
-auto get_title_volume(u64 tid) -> double {
-    return ini_getf("volume", get_tid_str(tid), 1.f, CONFIG_PATH);
-}
-
-void set_title_volume(u64 tid, double value) {
-    create_config_dir();
-    ini_putf("volume", get_tid_str(tid), value, CONFIG_PATH);
-}
-
-auto get_default_title_volume() -> double {
-    return ini_getf("config", "global_volume", 1.f, CONFIG_PATH);
-}
-
-void set_default_title_volume(double value) {
-    create_config_dir();
-    ini_putf("config", "global_volume", value, CONFIG_PATH);
-}
-
-auto get_title_blacklist(u64 tid) -> bool {
-    return ini_getbool("blacklist", get_tid_str(tid), false, BLACKLIST_PATH);
-}
-
-void set_title_blacklist(u64 tid, bool value) {
-    create_config_dir();
-    ini_putl("blacklist", get_tid_str(tid), value, BLACKLIST_PATH);
-}
-
-}
+#include "config.hpp"
+#include "sdmc/sdmc.hpp"
+#include "minIni/minIni.h"
+#include <cstdio>
+
+namespace config {
+
+namespace {
+
+const char CONFIG_PATH[]{"/config/sys-tune/config.ini"};
+// blacklist uses it's own config file because eventually a database
+// may be setup and users can easily update their blacklist by downloading
+// an updated blacklist.ini.
+// Also, the blacklist lookup needs to be as fast as possible
+// (literally a race until the title opens audren), so a seperate, smaller file is ideal.
+const char BLACKLIST_PATH[]{"/config/sys-tune/blacklist.ini"};
+
+void create_config_dir() {
+    /* Creating directory on every set call looks sus, but the user may delete the dir */
+    /* whilst the sys-mod is running and then any changes made via the overlay */
+    /* is lost, which sucks. */
+    sdmc::CreateFolder("/config");
+    sdmc::CreateFolder("/config/sys-tune");
+}
+
+auto get_tid_str(u64 tid) -> const char* {
+    static char buf[21]{};
+    std::sprintf(buf, "%016lX", tid);
+    return buf;
+}
+
+}
+
+auto get_shuffle() -> bool {
+    return ini_getbool("config", "shuffle", false, CONFIG_PATH);
+}
+
+void set_shuffle(bool value) {
+    create_config_dir();
+    ini_putl("config", "shuffle", value, CONFIG_PATH);
+}
+
+auto get_repeat() -> int {
+    return ini_getl("config", "repeat", 1, CONFIG_PATH);
+}
+
+void set_repeat(int value) {
+    create_config_dir();
+    ini_putl("config", "repeat", value, CONFIG_PATH);
+}
+
+auto get_bass() -> double {
+    return ini_getf("config", "bass", 1.f, CONFIG_PATH);
+}
+
+void set_bass(double value) {
+    create_config_dir();
+    ini_putf("config", "bass", value, CONFIG_PATH);
+}
+
+auto get_volume() -> double {
+    return ini_getf("config", "volume", 1.f, CONFIG_PATH);
+}
+
+void set_volume(double value) {
+    create_config_dir();
+    ini_putf("config", "volume", value, CONFIG_PATH);
+}
+
+auto has_title_enabled(u64 tid) -> bool {
+    return ini_haskey("title", get_tid_str(tid), CONFIG_PATH);
+}
+
+auto get_title_enabled(u64 tid) -> bool {
+    return ini_getbool("title", get_tid_str(tid), true, CONFIG_PATH);
+}
+
+void set_title_enabled(u64 tid, bool value) {
+    create_config_dir();
+    ini_putl("title", get_tid_str(tid), value, CONFIG_PATH);
+}
+
+auto get_title_enabled_default() -> bool {
+    return ini_getbool("title", "default", true, CONFIG_PATH);
+}
+
+void set_title_enabled_default(bool value) {
+    create_config_dir();
+    ini_putl("title", "default", value, CONFIG_PATH);
+}
+
+auto has_title_volume(u64 tid) -> bool {
+    return ini_haskey("volume", get_tid_str(tid), CONFIG_PATH);
+}
+
+auto get_title_volume(u64 tid) -> double {
+    return ini_getf("volume", get_tid_str(tid), 1.f, CONFIG_PATH);
+}
+
+void set_title_volume(u64 tid, double value) {
+    create_config_dir();
+    ini_putf("volume", get_tid_str(tid), value, CONFIG_PATH);
+}
+
+auto get_default_title_volume() -> double {
+    return ini_getf("config", "global_volume", 1.f, CONFIG_PATH);
+}
+
+void set_default_title_volume(double value) {
+    create_config_dir();
+    ini_putf("config", "global_volume", value, CONFIG_PATH);
+}
+
+auto get_title_blacklist(u64 tid) -> bool {
+    return ini_getbool("blacklist", get_tid_str(tid), false, BLACKLIST_PATH);
+}
+
+void set_title_blacklist(u64 tid, bool value) {
+    create_config_dir();
+    ini_putl("blacklist", get_tid_str(tid), value, BLACKLIST_PATH);
+}
+
+}
diff --git a/common/config/config.hpp b/common/config/config.hpp
index 2b2626d..fa6cbc3 100644
--- a/common/config/config.hpp
+++ b/common/config/config.hpp
@@ -1,45 +1,45 @@
-#pragma once
-
-#include <switch.h>
-
-namespace config {
-
-// tune shuffle
-auto get_shuffle() -> bool;
-void set_shuffle(bool value);
-
-// tune repeat
-auto get_repeat() -> int;
-void set_repeat(int value);
-
-// tune volume
-auto get_bass() -> double;
-void set_bass(double value);
-
-// tune volume
-auto get_volume() -> double;
-void set_volume(double value);
-
-// per title tune enable
-auto has_title_enabled(u64 tid) -> bool;
-auto get_title_enabled(u64 tid) -> bool;
-void set_title_enabled(u64 tid, bool value);
-
-// default for tune for every title
-auto get_title_enabled_default() -> bool;
-void set_title_enabled_default(bool value);
-
-// per title volume
-auto has_title_volume(u64 tid) -> bool;
-auto get_title_volume(u64 tid) -> double;
-void set_title_volume(u64 tid, double value);
-
-// default volume for every title
-auto get_default_title_volume() -> double;
-void set_default_title_volume(double value);
-
-// returns true if title causes a fatal on launch
-auto get_title_blacklist(u64 tid) -> bool;
-void set_title_blacklist(u64 tid, bool value);
-
-}
+#pragma once
+
+#include <switch.h>
+
+namespace config {
+
+// tune shuffle
+auto get_shuffle() -> bool;
+void set_shuffle(bool value);
+
+// tune repeat
+auto get_repeat() -> int;
+void set_repeat(int value);
+
+// tune volume
+auto get_bass() -> double;
+void set_bass(double value);
+
+// tune volume
+auto get_volume() -> double;
+void set_volume(double value);
+
+// per title tune enable
+auto has_title_enabled(u64 tid) -> bool;
+auto get_title_enabled(u64 tid) -> bool;
+void set_title_enabled(u64 tid, bool value);
+
+// default for tune for every title
+auto get_title_enabled_default() -> bool;
+void set_title_enabled_default(bool value);
+
+// per title volume
+auto has_title_volume(u64 tid) -> bool;
+auto get_title_volume(u64 tid) -> double;
+void set_title_volume(u64 tid, double value);
+
+// default volume for every title
+auto get_default_title_volume() -> double;
+void set_default_title_volume(double value);
+
+// returns true if title causes a fatal on launch
+auto get_title_blacklist(u64 tid) -> bool;
+void set_title_blacklist(u64 tid, bool value);
+
+}
diff --git a/ipc/ipc_cmd.h b/ipc/ipc_cmd.h
index 2443704..f9e6673 100644
--- a/ipc/ipc_cmd.h
+++ b/ipc/ipc_cmd.h
@@ -1,38 +1,38 @@
-#pragma once
-
-enum TuneIpcCmd {
-    TuneIpcCmd_GetStatus = 0,
-    TuneIpcCmd_Play = 1,
-    TuneIpcCmd_Pause = 2,
-    TuneIpcCmd_Next = 3,
-    TuneIpcCmd_Prev = 4,
-
-    TuneIpcCmd_GetVolume = 10,
-    TuneIpcCmd_SetVolume = 11,
-    TuneIpcCmd_GetTitleVolume = 12,
-    TuneIpcCmd_SetTitleVolume = 13,
-    TuneIpcCmd_GetDefaultTitleVolume = 14,
-    TuneIpcCmd_SetDefaultTitleVolume = 15,
-    TuneIpcCmd_GetBass = 16,
-    TuneIpcCmd_SetBass = 17,
-
-    TuneIpcCmd_GetRepeatMode = 20,
-    TuneIpcCmd_SetRepeatMode = 21,
-    TuneIpcCmd_GetShuffleMode = 22,
-    TuneIpcCmd_SetShuffleMode = 23,
-
-    TuneIpcCmd_GetPlaylistSize = 30,
-    TuneIpcCmd_GetPlaylistItem = 31,
-    TuneIpcCmd_GetCurrentQueueItem = 32,
-    TuneIpcCmd_ClearQueue = 33,
-    TuneIpcCmd_MoveQueueItem = 34,
-    TuneIpcCmd_Select = 35,
-    TuneIpcCmd_Seek = 36,
-
-    TuneIpcCmd_Enqueue = 40,
-    TuneIpcCmd_Remove = 41,
-
-    TuneIpcCmd_QuitServer = 50,
-
-    TuneIpcCmd_GetApiVersion = 5000,
-};
+#pragma once
+
+enum TuneIpcCmd {
+    TuneIpcCmd_GetStatus = 0,
+    TuneIpcCmd_Play = 1,
+    TuneIpcCmd_Pause = 2,
+    TuneIpcCmd_Next = 3,
+    TuneIpcCmd_Prev = 4,
+
+    TuneIpcCmd_GetVolume = 10,
+    TuneIpcCmd_SetVolume = 11,
+    TuneIpcCmd_GetTitleVolume = 12,
+    TuneIpcCmd_SetTitleVolume = 13,
+    TuneIpcCmd_GetDefaultTitleVolume = 14,
+    TuneIpcCmd_SetDefaultTitleVolume = 15,
+    TuneIpcCmd_GetBass = 16,
+    TuneIpcCmd_SetBass = 17,
+
+    TuneIpcCmd_GetRepeatMode = 20,
+    TuneIpcCmd_SetRepeatMode = 21,
+    TuneIpcCmd_GetShuffleMode = 22,
+    TuneIpcCmd_SetShuffleMode = 23,
+
+    TuneIpcCmd_GetPlaylistSize = 30,
+    TuneIpcCmd_GetPlaylistItem = 31,
+    TuneIpcCmd_GetCurrentQueueItem = 32,
+    TuneIpcCmd_ClearQueue = 33,
+    TuneIpcCmd_MoveQueueItem = 34,
+    TuneIpcCmd_Select = 35,
+    TuneIpcCmd_Seek = 36,
+
+    TuneIpcCmd_Enqueue = 40,
+    TuneIpcCmd_Remove = 41,
+
+    TuneIpcCmd_QuitServer = 50,
+
+    TuneIpcCmd_GetApiVersion = 5000,
+};
diff --git a/ipc/tune.c b/ipc/tune.c
index af10876..09c9b7e 100644
--- a/ipc/tune.c
+++ b/ipc/tune.c
@@ -1,155 +1,155 @@
-#include "tune.h"
-
-#include "ipc_cmd.h"
-
-#include <string.h>
-
-Service g_tune;
-
-Result tuneInitialize() {
-    Handle handle = 0;
-    Result rc = svcConnectToNamedPort(&handle, "tune");
-    if (R_SUCCEEDED(rc))
-        serviceCreate(&g_tune, handle);
-    return rc;
-}
-
-void tuneExit() {
-    serviceClose(&g_tune);
-}
-
-Result tuneGetStatus(bool *status) {
-    u8 tmp=0;
-    Result rc = serviceDispatchOut(&g_tune, TuneIpcCmd_GetStatus, tmp);
-    if (R_SUCCEEDED(rc) && status) *status = tmp & 1;
-    return rc;
-}
-
-Result tunePlay() {
-    return serviceDispatch(&g_tune, TuneIpcCmd_Play);
-}
-
-Result tunePause() {
-    return serviceDispatch(&g_tune, TuneIpcCmd_Pause);
-}
-
-Result tuneNext() {
-    return serviceDispatch(&g_tune, TuneIpcCmd_Next);
-}
-
-Result tunePrev() {
-    return serviceDispatch(&g_tune, TuneIpcCmd_Prev);
-}
-
-Result tuneGetBass(double *out) {
-    return serviceDispatchOut(&g_tune, TuneIpcCmd_GetBass, *out);
-}
-
-Result tuneSetBass(double bass) {
-    return serviceDispatchIn(&g_tune, TuneIpcCmd_SetBass, bass);
-}
-
-Result tuneGetVolume(double *out) {
-    return serviceDispatchOut(&g_tune, TuneIpcCmd_GetVolume, *out);
-}
-
-Result tuneSetVolume(double volume) {
-    return serviceDispatchIn(&g_tune, TuneIpcCmd_SetVolume, volume);
-}
-
-Result tuneGetTitleVolume(double *out) {
-    return serviceDispatchOut(&g_tune, TuneIpcCmd_GetTitleVolume, *out);
-}
-
-Result tuneSetTitleVolume(double volume) {
-    return serviceDispatchIn(&g_tune, TuneIpcCmd_SetTitleVolume, volume);
-}
-
-Result tuneGetDefaultTitleVolume(double *out) {
-    return serviceDispatchOut(&g_tune, TuneIpcCmd_GetDefaultTitleVolume, *out);
-}
-
-Result tuneSetDefaultTitleVolume(double volume) {
-    return serviceDispatchIn(&g_tune, TuneIpcCmd_SetDefaultTitleVolume, volume);
-}
-
-Result tuneGetRepeatMode(TuneRepeatMode *state) {
-    u8 out = 0;
-    Result rc = serviceDispatchOut(&g_tune, TuneIpcCmd_GetRepeatMode, out);
-    if (R_SUCCEEDED(rc) && state)
-        *state = out;
-    return rc;
-}
-
-Result tuneSetRepeatMode(TuneRepeatMode state) {
-    u8 tmp = state;
-    return serviceDispatchIn(&g_tune, TuneIpcCmd_SetRepeatMode, tmp);
-}
-
-Result tuneGetShuffleMode(TuneShuffleMode *state) {
-    u8 out = 0;
-    Result rc = serviceDispatchOut(&g_tune, TuneIpcCmd_GetShuffleMode, out);
-    if (R_SUCCEEDED(rc) && state)
-        *state = out;
-    return rc;
-}
-Result tuneSetShuffleMode(TuneShuffleMode state) {
-    u8 tmp = state;
-    return serviceDispatchIn(&g_tune, TuneIpcCmd_SetShuffleMode, tmp);
-}
-
-Result tuneGetPlaylistSize(u32 *count) {
-    return serviceDispatchOut(&g_tune, TuneIpcCmd_GetPlaylistSize, *count);
-}
-
-Result tuneGetPlaylistItem(u32 index, char *out_path, size_t out_path_length) {
-    return serviceDispatchIn(&g_tune, TuneIpcCmd_GetPlaylistItem, index,
-                              .buffer_attrs = {SfBufferAttr_Out | SfBufferAttr_HipcMapAlias},
-                              .buffers = {{out_path, out_path_length}}, );
-}
-
-Result tuneGetCurrentQueueItem(char *out_path, size_t out_path_length, TuneCurrentStats *out) {
-    return serviceDispatchOut(&g_tune, TuneIpcCmd_GetCurrentQueueItem, *out,
-                              .buffer_attrs = {SfBufferAttr_Out | SfBufferAttr_HipcMapAlias},
-                              .buffers = {{out_path, out_path_length}}, );
-}
-
-Result tuneClearQueue() {
-    return serviceDispatch(&g_tune, TuneIpcCmd_ClearQueue);
-}
-
-Result tuneMoveQueueItem(u32 src, u32 dst) {
-    const struct {
-        u32 src;
-        u32 dst;
-    } in = {src, dst};
-    return serviceDispatchIn(&g_tune, TuneIpcCmd_MoveQueueItem, in);
-}
-
-Result tuneSelect(u32 index) {
-    return serviceDispatchIn(&g_tune, TuneIpcCmd_Select, index);
-}
-
-Result tuneSeek(u32 position) {
-    return serviceDispatchIn(&g_tune, TuneIpcCmd_Seek, position);
-}
-
-Result tuneEnqueue(const char *path, TuneEnqueueType type) {
-    u8 tmp = type;
-    size_t path_length = strlen(path);
-    return serviceDispatchIn(&g_tune, TuneIpcCmd_Enqueue, tmp,
-                             .buffer_attrs = {SfBufferAttr_In | SfBufferAttr_HipcMapAlias},
-                             .buffers = {{path, path_length}}, );
-}
-
-Result tuneRemove(u32 index) {
-    return serviceDispatchIn(&g_tune, TuneIpcCmd_Remove, index);
-}
-
-Result tuneQuit() {
-    return serviceDispatch(&g_tune, TuneIpcCmd_QuitServer);
-}
-
-Result tuneGetApiVersion(u32 *version) {
-    return serviceDispatchOut(&g_tune, TuneIpcCmd_GetApiVersion, *version);
-}
+#include "tune.h"
+
+#include "ipc_cmd.h"
+
+#include <string.h>
+
+Service g_tune;
+
+Result tuneInitialize() {
+    Handle handle = 0;
+    Result rc = svcConnectToNamedPort(&handle, "tune");
+    if (R_SUCCEEDED(rc))
+        serviceCreate(&g_tune, handle);
+    return rc;
+}
+
+void tuneExit() {
+    serviceClose(&g_tune);
+}
+
+Result tuneGetStatus(bool *status) {
+    u8 tmp=0;
+    Result rc = serviceDispatchOut(&g_tune, TuneIpcCmd_GetStatus, tmp);
+    if (R_SUCCEEDED(rc) && status) *status = tmp & 1;
+    return rc;
+}
+
+Result tunePlay() {
+    return serviceDispatch(&g_tune, TuneIpcCmd_Play);
+}
+
+Result tunePause() {
+    return serviceDispatch(&g_tune, TuneIpcCmd_Pause);
+}
+
+Result tuneNext() {
+    return serviceDispatch(&g_tune, TuneIpcCmd_Next);
+}
+
+Result tunePrev() {
+    return serviceDispatch(&g_tune, TuneIpcCmd_Prev);
+}
+
+Result tuneGetBass(double *out) {
+    return serviceDispatchOut(&g_tune, TuneIpcCmd_GetBass, *out);
+}
+
+Result tuneSetBass(double bass) {
+    return serviceDispatchIn(&g_tune, TuneIpcCmd_SetBass, bass);
+}
+
+Result tuneGetVolume(double *out) {
+    return serviceDispatchOut(&g_tune, TuneIpcCmd_GetVolume, *out);
+}
+
+Result tuneSetVolume(double volume) {
+    return serviceDispatchIn(&g_tune, TuneIpcCmd_SetVolume, volume);
+}
+
+Result tuneGetTitleVolume(double *out) {
+    return serviceDispatchOut(&g_tune, TuneIpcCmd_GetTitleVolume, *out);
+}
+
+Result tuneSetTitleVolume(double volume) {
+    return serviceDispatchIn(&g_tune, TuneIpcCmd_SetTitleVolume, volume);
+}
+
+Result tuneGetDefaultTitleVolume(double *out) {
+    return serviceDispatchOut(&g_tune, TuneIpcCmd_GetDefaultTitleVolume, *out);
+}
+
+Result tuneSetDefaultTitleVolume(double volume) {
+    return serviceDispatchIn(&g_tune, TuneIpcCmd_SetDefaultTitleVolume, volume);
+}
+
+Result tuneGetRepeatMode(TuneRepeatMode *state) {
+    u8 out = 0;
+    Result rc = serviceDispatchOut(&g_tune, TuneIpcCmd_GetRepeatMode, out);
+    if (R_SUCCEEDED(rc) && state)
+        *state = out;
+    return rc;
+}
+
+Result tuneSetRepeatMode(TuneRepeatMode state) {
+    u8 tmp = state;
+    return serviceDispatchIn(&g_tune, TuneIpcCmd_SetRepeatMode, tmp);
+}
+
+Result tuneGetShuffleMode(TuneShuffleMode *state) {
+    u8 out = 0;
+    Result rc = serviceDispatchOut(&g_tune, TuneIpcCmd_GetShuffleMode, out);
+    if (R_SUCCEEDED(rc) && state)
+        *state = out;
+    return rc;
+}
+Result tuneSetShuffleMode(TuneShuffleMode state) {
+    u8 tmp = state;
+    return serviceDispatchIn(&g_tune, TuneIpcCmd_SetShuffleMode, tmp);
+}
+
+Result tuneGetPlaylistSize(u32 *count) {
+    return serviceDispatchOut(&g_tune, TuneIpcCmd_GetPlaylistSize, *count);
+}
+
+Result tuneGetPlaylistItem(u32 index, char *out_path, size_t out_path_length) {
+    return serviceDispatchIn(&g_tune, TuneIpcCmd_GetPlaylistItem, index,
+                              .buffer_attrs = {SfBufferAttr_Out | SfBufferAttr_HipcMapAlias},
+                              .buffers = {{out_path, out_path_length}}, );
+}
+
+Result tuneGetCurrentQueueItem(char *out_path, size_t out_path_length, TuneCurrentStats *out) {
+    return serviceDispatchOut(&g_tune, TuneIpcCmd_GetCurrentQueueItem, *out,
+                              .buffer_attrs = {SfBufferAttr_Out | SfBufferAttr_HipcMapAlias},
+                              .buffers = {{out_path, out_path_length}}, );
+}
+
+Result tuneClearQueue() {
+    return serviceDispatch(&g_tune, TuneIpcCmd_ClearQueue);
+}
+
+Result tuneMoveQueueItem(u32 src, u32 dst) {
+    const struct {
+        u32 src;
+        u32 dst;
+    } in = {src, dst};
+    return serviceDispatchIn(&g_tune, TuneIpcCmd_MoveQueueItem, in);
+}
+
+Result tuneSelect(u32 index) {
+    return serviceDispatchIn(&g_tune, TuneIpcCmd_Select, index);
+}
+
+Result tuneSeek(u32 position) {
+    return serviceDispatchIn(&g_tune, TuneIpcCmd_Seek, position);
+}
+
+Result tuneEnqueue(const char *path, TuneEnqueueType type) {
+    u8 tmp = type;
+    size_t path_length = strlen(path);
+    return serviceDispatchIn(&g_tune, TuneIpcCmd_Enqueue, tmp,
+                             .buffer_attrs = {SfBufferAttr_In | SfBufferAttr_HipcMapAlias},
+                             .buffers = {{path, path_length}}, );
+}
+
+Result tuneRemove(u32 index) {
+    return serviceDispatchIn(&g_tune, TuneIpcCmd_Remove, index);
+}
+
+Result tuneQuit() {
+    return serviceDispatch(&g_tune, TuneIpcCmd_QuitServer);
+}
+
+Result tuneGetApiVersion(u32 *version) {
+    return serviceDispatchOut(&g_tune, TuneIpcCmd_GetApiVersion, *version);
+}
diff --git a/ipc/tune.h b/ipc/tune.h
index 05e8fad..3558166 100644
--- a/ipc/tune.h
+++ b/ipc/tune.h
@@ -1,165 +1,165 @@
-#pragma once
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <switch.h>
-
-typedef enum {
-    TuneShuffleMode_Off,
-    TuneShuffleMode_On,
-
-    TuneShuffleMode_Count,
-} TuneShuffleMode;
-
-typedef enum {
-    TuneRepeatMode_Off,
-    TuneRepeatMode_One,
-    TuneRepeatMode_All,
-
-    TuneRepeatMode_Count,
-} TuneRepeatMode;
-
-typedef enum {
-    TuneEnqueueType_Front,
-    TuneEnqueueType_Back,
-
-    TuneEnqueueType_Count,
-} TuneEnqueueType;
-
-typedef struct {
-    u32 sample_rate;
-    u32 current_frame;
-    u32 total_frames;
-} TuneCurrentStats;
-
-Result tuneInitialize();
-
-void tuneExit();
-
-/**
- * @brief Get the current status of playback.
- * @param[out] status \ref AudioOutState
- */
-Result tuneGetStatus(bool *status);
-
-Result tunePlay();
-Result tunePause();
-Result tuneNext();
-Result tunePrev();
-
-/**
- * @brief Get the current playback volume.
- * @note On FW lower than [6.0.0] this will set the decode volume.
- * @param[out] out volume value (linear factor).
- */
-Result tuneGetBass(double *out);
-
-/**
- * @brief Set the playback volume.
- * @note On FW lower than [6.0.0] this will return the decode volume.
- * @param[in] volume volume value (linear factor).
- */
-Result tuneSetBass(double bass);
-
-/**
- * @brief Get the current playback volume.
- * @note On FW lower than [6.0.0] this will set the decode volume.
- * @param[out] out volume value (linear factor).
- */
-Result tuneGetVolume(double *out);
-
-/**
- * @brief Set the playback volume.
- * @note On FW lower than [6.0.0] this will return the decode volume.
- * @param[in] volume volume value (linear factor).
- */
-Result tuneSetVolume(double volume);
-
-/**
- * @brief Get the volume of the current title
- * @param[out] out volume value (linear factor).
- */
-Result tuneGetTitleVolume(double *out);
-
-/**
- * @brief Set the volume of the current title
- * @param[in] volume volume value (linear factor).
- */
-Result tuneSetTitleVolume(double volume);
-
-/**
- * @brief Get the default volume of all titles
- * @param[out] out volume value (linear factor).
- */
-Result tuneGetDefaultTitleVolume(double *out);
-
-/**
- * @brief Set the default volume of all titles
- * @param[in] volume volume value (linear factor).
- */
-Result tuneSetDefaultTitleVolume(double volume);
-
-/**
- * @brief Get the current loop status.
- * @param[out] state \ref TuneRepeatMode
- */
-Result tuneGetRepeatMode(TuneRepeatMode *state);
-
-/**
- * @brief Set repeat mode.
- * @param[in] state \ref TuneRepeatMode
- */
-Result tuneSetRepeatMode(TuneRepeatMode state);
-
-Result tuneGetShuffleMode(TuneShuffleMode *state);
-Result tuneSetShuffleMode(TuneShuffleMode state);
-
-/**
- * @brief Get the current queue size.
- * @param[out] count remaining tracks after current.
- */
-Result tuneGetPlaylistSize(u32 *count);
-
-/**
- * @brief Read queue.
- * @param[out] read Amount written to buffer.
- * @param[out] out_path Path array FS_MAX_PATH * n
- * @param[in] out_path_length Size of the supplied path array.
- */
-Result tuneGetPlaylistItem(u32 index, char *out_path, size_t out_path_length);
-
-/**
- * @brief Get current song.
- * @param[out] out_path Path to current playing song.
- * @param[in] out_path_length Size of the out_path buffer. Path of the current track needs to fit.
- * @param[out] out \ref MusicCurrentTune
- */
-Result tuneGetCurrentQueueItem(char *out_path, size_t out_path_length, TuneCurrentStats *out);
-
-/**
- * @brief Clear queue.
- */
-Result tuneClearQueue();
-Result tuneMoveQueueItem(u32 src, u32 dst);
-Result tuneSelect(u32 index);
-Result tuneSeek(u32 position);
-
-/**
- * @brief Add track to queue.
- * @note Must not include leading mount name.
- * @note Must match ^(sdmc:/.*.mp3)$
- * @param[in] path Path to file on sdcard.
- */
-Result tuneEnqueue(const char *path, TuneEnqueueType type);
-
-Result tuneRemove(u32 index);
-
-Result tuneQuit();
-
-Result tuneGetApiVersion(u32 *version);
-
-#ifdef __cplusplus
-}
-#endif
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <switch.h>
+
+typedef enum {
+    TuneShuffleMode_Off,
+    TuneShuffleMode_On,
+
+    TuneShuffleMode_Count,
+} TuneShuffleMode;
+
+typedef enum {
+    TuneRepeatMode_Off,
+    TuneRepeatMode_One,
+    TuneRepeatMode_All,
+
+    TuneRepeatMode_Count,
+} TuneRepeatMode;
+
+typedef enum {
+    TuneEnqueueType_Front,
+    TuneEnqueueType_Back,
+
+    TuneEnqueueType_Count,
+} TuneEnqueueType;
+
+typedef struct {
+    u32 sample_rate;
+    u32 current_frame;
+    u32 total_frames;
+} TuneCurrentStats;
+
+Result tuneInitialize();
+
+void tuneExit();
+
+/**
+ * @brief Get the current status of playback.
+ * @param[out] status \ref AudioOutState
+ */
+Result tuneGetStatus(bool *status);
+
+Result tunePlay();
+Result tunePause();
+Result tuneNext();
+Result tunePrev();
+
+/**
+ * @brief Get the current playback volume.
+ * @note On FW lower than [6.0.0] this will set the decode volume.
+ * @param[out] out volume value (linear factor).
+ */
+Result tuneGetBass(double *out);
+
+/**
+ * @brief Set the playback volume.
+ * @note On FW lower than [6.0.0] this will return the decode volume.
+ * @param[in] volume volume value (linear factor).
+ */
+Result tuneSetBass(double bass);
+
+/**
+ * @brief Get the current playback volume.
+ * @note On FW lower than [6.0.0] this will set the decode volume.
+ * @param[out] out volume value (linear factor).
+ */
+Result tuneGetVolume(double *out);
+
+/**
+ * @brief Set the playback volume.
+ * @note On FW lower than [6.0.0] this will return the decode volume.
+ * @param[in] volume volume value (linear factor).
+ */
+Result tuneSetVolume(double volume);
+
+/**
+ * @brief Get the volume of the current title
+ * @param[out] out volume value (linear factor).
+ */
+Result tuneGetTitleVolume(double *out);
+
+/**
+ * @brief Set the volume of the current title
+ * @param[in] volume volume value (linear factor).
+ */
+Result tuneSetTitleVolume(double volume);
+
+/**
+ * @brief Get the default volume of all titles
+ * @param[out] out volume value (linear factor).
+ */
+Result tuneGetDefaultTitleVolume(double *out);
+
+/**
+ * @brief Set the default volume of all titles
+ * @param[in] volume volume value (linear factor).
+ */
+Result tuneSetDefaultTitleVolume(double volume);
+
+/**
+ * @brief Get the current loop status.
+ * @param[out] state \ref TuneRepeatMode
+ */
+Result tuneGetRepeatMode(TuneRepeatMode *state);
+
+/**
+ * @brief Set repeat mode.
+ * @param[in] state \ref TuneRepeatMode
+ */
+Result tuneSetRepeatMode(TuneRepeatMode state);
+
+Result tuneGetShuffleMode(TuneShuffleMode *state);
+Result tuneSetShuffleMode(TuneShuffleMode state);
+
+/**
+ * @brief Get the current queue size.
+ * @param[out] count remaining tracks after current.
+ */
+Result tuneGetPlaylistSize(u32 *count);
+
+/**
+ * @brief Read queue.
+ * @param[out] read Amount written to buffer.
+ * @param[out] out_path Path array FS_MAX_PATH * n
+ * @param[in] out_path_length Size of the supplied path array.
+ */
+Result tuneGetPlaylistItem(u32 index, char *out_path, size_t out_path_length);
+
+/**
+ * @brief Get current song.
+ * @param[out] out_path Path to current playing song.
+ * @param[in] out_path_length Size of the out_path buffer. Path of the current track needs to fit.
+ * @param[out] out \ref MusicCurrentTune
+ */
+Result tuneGetCurrentQueueItem(char *out_path, size_t out_path_length, TuneCurrentStats *out);
+
+/**
+ * @brief Clear queue.
+ */
+Result tuneClearQueue();
+Result tuneMoveQueueItem(u32 src, u32 dst);
+Result tuneSelect(u32 index);
+Result tuneSeek(u32 position);
+
+/**
+ * @brief Add track to queue.
+ * @note Must not include leading mount name.
+ * @note Must match ^(sdmc:/.*.mp3)$
+ * @param[in] path Path to file on sdcard.
+ */
+Result tuneEnqueue(const char *path, TuneEnqueueType type);
+
+Result tuneRemove(u32 index);
+
+Result tuneQuit();
+
+Result tuneGetApiVersion(u32 *version);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/libtesla/README.md b/libtesla/README.md
index b874f7d..9a53aae 100644
--- a/libtesla/README.md
+++ b/libtesla/README.md
@@ -1,32 +1,32 @@
-# libtesla
-
-<p align="center">
-  <img src="https://i.imgur.com/fhwyjbd.png">
-</p>
-
-libtesla is the interface between the Tesla overlay loader and user-made Overlays. It handles all layer creation, UI creation, drawing and input management.
-It's main goal is to make sure all overlays look and feel similar and don't differenciate themselves from the switch's native overlays.
-
-## Screenshots
-
-<div>
-    <img src="https://i.imgur.com/jJpxYjb.jpg" style="width: 50%; double: center">
-    <img src="https://i.imgur.com/nBUc7ps.jpg" style="width: 50%; double: right">
-</div>
-
-`Overlays do NOT show up on Screenshots. These pictures were taken using a capture card`
-
-## Example
-
-An example for how to use libtesla can be found here: https://github.com/WerWolv/libtesla/tree/master/example
-To create your own Overlay, please consider creating a new repository using the official Tesla overlay template: https://github.com/WerWolv/Tesla-Template
-
-**Please Note:** While it is possible to create overlays without libtesla, it's highly recommended to not do so. libtesla handles showing and hiding of overlays, button combo detection, layer creation and a lot more. Not using it will lead to an inconsistent user experience when using multiple different overlays ultimately making it worse for the end user. If something's missing, please consider opening a PR here.
-
-## Credits
-
-- **switchbrew** for nx-hbloader which is used as basis for overlay loading
-- **kardch** for the amazing icon
-- **All the devs on AtlasNX, RetroNX and Switchbrew** for their feedback
-- **All overlay devs** for making something awesome out of this :)
-
+# libtesla
+
+<p align="center">
+  <img src="https://i.imgur.com/fhwyjbd.png">
+</p>
+
+libtesla is the interface between the Tesla overlay loader and user-made Overlays. It handles all layer creation, UI creation, drawing and input management.
+It's main goal is to make sure all overlays look and feel similar and don't differenciate themselves from the switch's native overlays.
+
+## Screenshots
+
+<div>
+    <img src="https://i.imgur.com/jJpxYjb.jpg" style="width: 50%; double: center">
+    <img src="https://i.imgur.com/nBUc7ps.jpg" style="width: 50%; double: right">
+</div>
+
+`Overlays do NOT show up on Screenshots. These pictures were taken using a capture card`
+
+## Example
+
+An example for how to use libtesla can be found here: https://github.com/WerWolv/libtesla/tree/master/example
+To create your own Overlay, please consider creating a new repository using the official Tesla overlay template: https://github.com/WerWolv/Tesla-Template
+
+**Please Note:** While it is possible to create overlays without libtesla, it's highly recommended to not do so. libtesla handles showing and hiding of overlays, button combo detection, layer creation and a lot more. Not using it will lead to an inconsistent user experience when using multiple different overlays ultimately making it worse for the end user. If something's missing, please consider opening a PR here.
+
+## Credits
+
+- **switchbrew** for nx-hbloader which is used as basis for overlay loading
+- **kardch** for the amazing icon
+- **All the devs on AtlasNX, RetroNX and Switchbrew** for their feedback
+- **All overlay devs** for making something awesome out of this :)
+
diff --git a/libtesla/example/Makefile b/libtesla/example/Makefile
index cdb011a..ef02dc2 100644
--- a/libtesla/example/Makefile
+++ b/libtesla/example/Makefile
@@ -1,209 +1,209 @@
-#---------------------------------------------------------------------------------
-.SUFFIXES:
-#---------------------------------------------------------------------------------
-
-ifeq ($(strip $(DEVKITPRO)),)
-$(error "Please set DEVKITPRO in your environment. export DEVKITPRO=<path to>/devkitpro")
-endif
-
-TOPDIR ?= $(CURDIR)
-include $(DEVKITPRO)/libnx/switch_rules
-
-#---------------------------------------------------------------------------------
-# TARGET is the name of the output
-# BUILD is the directory where object files & intermediate files will be placed
-# SOURCES is a list of directories containing source code
-# DATA is a list of directories containing data files
-# INCLUDES is a list of directories containing header files
-# ROMFS is the directory containing data to be added to RomFS, relative to the Makefile (Optional)
-#
-# NO_ICON: if set to anything, do not use icon.
-# NO_NACP: if set to anything, no .nacp file is generated.
-# APP_TITLE is the name of the app stored in the .nacp file (Optional)
-# APP_AUTHOR is the author of the app stored in the .nacp file (Optional)
-# APP_VERSION is the version of the app stored in the .nacp file (Optional)
-# APP_TITLEID is the titleID of the app stored in the .nacp file (Optional)
-# ICON is the filename of the icon (.jpg), relative to the project folder.
-#   If not set, it attempts to use one of the following (in this order):
-#     - <Project name>.jpg
-#     - icon.jpg
-#     - <libnx folder>/default_icon.jpg
-#
-# CONFIG_JSON is the filename of the NPDM config file (.json), relative to the project folder.
-#   If not set, it attempts to use one of the following (in this order):
-#     - <Project name>.json
-#     - config.json
-#   If a JSON file is provided or autodetected, an ExeFS PFS0 (.nsp) is built instead
-#   of a homebrew executable (.nro). This is intended to be used for sysmodules.
-#   NACP building is skipped as well.
-#---------------------------------------------------------------------------------
-
-APP_TITLE			:=		Tesla Example
-APP_VERSION 	:= 		1.3.0
-
-TARGET		:=	$(notdir $(CURDIR))
-BUILD		:=	build
-SOURCES		:=	source
-DATA		:=	data
-INCLUDES	:=	../include
-
-NO_ICON		:=  1
-
-#---------------------------------------------------------------------------------
-# options for code generation
-#---------------------------------------------------------------------------------
-ARCH	:=	-march=armv8-a+crc+crypto -mtune=cortex-a57 -mtp=soft -fPIE
-
-CFLAGS	:=	-g -Wall -O2 -ffunction-sections \
-			$(ARCH) $(DEFINES)
-
-CFLAGS	+=	$(INCLUDE) -D__SWITCH__
-
-CXXFLAGS	:= $(CFLAGS) -fno-exceptions -std=c++20
-
-ASFLAGS	:=	-g $(ARCH)
-LDFLAGS	=	-specs=$(DEVKITPRO)/libnx/switch.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map)
-
-LIBS	:= -lnx
-
-#---------------------------------------------------------------------------------
-# list of directories containing libraries, this must be the top level containing
-# include and lib
-#---------------------------------------------------------------------------------
-LIBDIRS	:= $(PORTLIBS) $(LIBNX)
-
-
-#---------------------------------------------------------------------------------
-# no real need to edit anything past this point unless you need to add additional
-# rules for different file extensions
-#---------------------------------------------------------------------------------
-ifneq ($(BUILD),$(notdir $(CURDIR)))
-#---------------------------------------------------------------------------------
-
-export OUTPUT	:=	$(CURDIR)/$(TARGET)
-export TOPDIR	:=	$(CURDIR)
-
-export VPATH	:=	$(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \
-			$(foreach dir,$(DATA),$(CURDIR)/$(dir))
-
-export DEPSDIR	:=	$(CURDIR)/$(BUILD)
-
-CFILES		:=	$(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c)))
-CPPFILES	:=	$(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp)))
-SFILES		:=	$(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s)))
-BINFILES	:=	$(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*)))
-
-#---------------------------------------------------------------------------------
-# use CXX for linking C++ projects, CC for standard C
-#---------------------------------------------------------------------------------
-ifeq ($(strip $(CPPFILES)),)
-#---------------------------------------------------------------------------------
-	export LD	:=	$(CC)
-#---------------------------------------------------------------------------------
-else
-#---------------------------------------------------------------------------------
-	export LD	:=	$(CXX)
-#---------------------------------------------------------------------------------
-endif
-#---------------------------------------------------------------------------------
-
-export OFILES_BIN	:=	$(addsuffix .o,$(BINFILES))
-export OFILES_SRC	:=	$(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o)
-export OFILES 	:=	$(OFILES_BIN) $(OFILES_SRC)
-export HFILES_BIN	:=	$(addsuffix .h,$(subst .,_,$(BINFILES)))
-
-export INCLUDE	:=	$(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \
-			$(foreach dir,$(LIBDIRS),-I$(dir)/include) \
-			-I$(CURDIR)/$(BUILD)
-
-export LIBPATHS	:=	$(foreach dir,$(LIBDIRS),-L$(dir)/lib)
-
-ifeq ($(strip $(CONFIG_JSON)),)
-	jsons := $(wildcard *.json)
-	ifneq (,$(findstring $(TARGET).json,$(jsons)))
-		export APP_JSON := $(TOPDIR)/$(TARGET).json
-	else
-		ifneq (,$(findstring config.json,$(jsons)))
-			export APP_JSON := $(TOPDIR)/config.json
-		endif
-	endif
-else
-	export APP_JSON := $(TOPDIR)/$(CONFIG_JSON)
-endif
-
-ifeq ($(strip $(ICON)),)
-	icons := $(wildcard *.jpg)
-	ifneq (,$(findstring $(TARGET).jpg,$(icons)))
-		export APP_ICON := $(TOPDIR)/$(TARGET).jpg
-	else
-		ifneq (,$(findstring icon.jpg,$(icons)))
-			export APP_ICON := $(TOPDIR)/icon.jpg
-		endif
-	endif
-else
-	export APP_ICON := $(TOPDIR)/$(ICON)
-endif
-
-ifeq ($(strip $(NO_ICON)),)
-	export NROFLAGS += --icon=$(APP_ICON)
-endif
-
-ifeq ($(strip $(NO_NACP)),)
-	export NROFLAGS += --nacp=$(CURDIR)/$(TARGET).nacp
-endif
-
-ifneq ($(APP_TITLEID),)
-	export NACPFLAGS += --titleid=$(APP_TITLEID)
-endif
-
-ifneq ($(ROMFS),)
-	export NROFLAGS += --romfsdir=$(CURDIR)/$(ROMFS)
-endif
-
-.PHONY: $(BUILD) clean all
-
-#---------------------------------------------------------------------------------
-all: $(BUILD)
-
-
-$(BUILD):
-	@[ -d $@ ] || mkdir -p $@
-	@$(MAKE) --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile
-
-#---------------------------------------------------------------------------------
-clean:
-	@rm -fr $(BUILD) $(TARGET).ovl $(TARGET).nro $(TARGET).nacp $(TARGET).elf
-
-
-#---------------------------------------------------------------------------------
-else
-.PHONY:	all
-
-DEPENDS	:=	$(OFILES:.o=.d)
-
-#---------------------------------------------------------------------------------
-# main targets
-#---------------------------------------------------------------------------------
-all	:	 $(OUTPUT).ovl
-
-$(OUTPUT).ovl		:	$(OUTPUT).elf $(OUTPUT).nacp 
-	@elf2nro $< $@ $(NROFLAGS)
-	@echo "built ... $(notdir $(OUTPUT).ovl)"
-
-$(OUTPUT).elf	:	$(OFILES)
-
-$(OFILES_SRC)	: $(HFILES_BIN)
-
-#---------------------------------------------------------------------------------
-# you need a rule like this for each extension you use as binary data
-#---------------------------------------------------------------------------------
-%.bin.o	%_bin.h :	%.bin
-#---------------------------------------------------------------------------------
-	@echo $(notdir $<)
-	@$(bin2o)
-
--include $(DEPENDS)
-
-#---------------------------------------------------------------------------------------
-endif
-#---------------------------------------------------------------------------------------
+#---------------------------------------------------------------------------------
+.SUFFIXES:
+#---------------------------------------------------------------------------------
+
+ifeq ($(strip $(DEVKITPRO)),)
+$(error "Please set DEVKITPRO in your environment. export DEVKITPRO=<path to>/devkitpro")
+endif
+
+TOPDIR ?= $(CURDIR)
+include $(DEVKITPRO)/libnx/switch_rules
+
+#---------------------------------------------------------------------------------
+# TARGET is the name of the output
+# BUILD is the directory where object files & intermediate files will be placed
+# SOURCES is a list of directories containing source code
+# DATA is a list of directories containing data files
+# INCLUDES is a list of directories containing header files
+# ROMFS is the directory containing data to be added to RomFS, relative to the Makefile (Optional)
+#
+# NO_ICON: if set to anything, do not use icon.
+# NO_NACP: if set to anything, no .nacp file is generated.
+# APP_TITLE is the name of the app stored in the .nacp file (Optional)
+# APP_AUTHOR is the author of the app stored in the .nacp file (Optional)
+# APP_VERSION is the version of the app stored in the .nacp file (Optional)
+# APP_TITLEID is the titleID of the app stored in the .nacp file (Optional)
+# ICON is the filename of the icon (.jpg), relative to the project folder.
+#   If not set, it attempts to use one of the following (in this order):
+#     - <Project name>.jpg
+#     - icon.jpg
+#     - <libnx folder>/default_icon.jpg
+#
+# CONFIG_JSON is the filename of the NPDM config file (.json), relative to the project folder.
+#   If not set, it attempts to use one of the following (in this order):
+#     - <Project name>.json
+#     - config.json
+#   If a JSON file is provided or autodetected, an ExeFS PFS0 (.nsp) is built instead
+#   of a homebrew executable (.nro). This is intended to be used for sysmodules.
+#   NACP building is skipped as well.
+#---------------------------------------------------------------------------------
+
+APP_TITLE			:=		Tesla Example
+APP_VERSION 	:= 		1.3.0
+
+TARGET		:=	$(notdir $(CURDIR))
+BUILD		:=	build
+SOURCES		:=	source
+DATA		:=	data
+INCLUDES	:=	../include
+
+NO_ICON		:=  1
+
+#---------------------------------------------------------------------------------
+# options for code generation
+#---------------------------------------------------------------------------------
+ARCH	:=	-march=armv8-a+crc+crypto -mtune=cortex-a57 -mtp=soft -fPIE
+
+CFLAGS	:=	-g -Wall -O2 -ffunction-sections \
+			$(ARCH) $(DEFINES)
+
+CFLAGS	+=	$(INCLUDE) -D__SWITCH__
+
+CXXFLAGS	:= $(CFLAGS) -fno-exceptions -std=c++20
+
+ASFLAGS	:=	-g $(ARCH)
+LDFLAGS	=	-specs=$(DEVKITPRO)/libnx/switch.specs -g $(ARCH) -Wl,-Map,$(notdir $*.map)
+
+LIBS	:= -lnx
+
+#---------------------------------------------------------------------------------
+# list of directories containing libraries, this must be the top level containing
+# include and lib
+#---------------------------------------------------------------------------------
+LIBDIRS	:= $(PORTLIBS) $(LIBNX)
+
+
+#---------------------------------------------------------------------------------
+# no real need to edit anything past this point unless you need to add additional
+# rules for different file extensions
+#---------------------------------------------------------------------------------
+ifneq ($(BUILD),$(notdir $(CURDIR)))
+#---------------------------------------------------------------------------------
+
+export OUTPUT	:=	$(CURDIR)/$(TARGET)
+export TOPDIR	:=	$(CURDIR)
+
+export VPATH	:=	$(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \
+			$(foreach dir,$(DATA),$(CURDIR)/$(dir))
+
+export DEPSDIR	:=	$(CURDIR)/$(BUILD)
+
+CFILES		:=	$(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c)))
+CPPFILES	:=	$(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp)))
+SFILES		:=	$(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s)))
+BINFILES	:=	$(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*)))
+
+#---------------------------------------------------------------------------------
+# use CXX for linking C++ projects, CC for standard C
+#---------------------------------------------------------------------------------
+ifeq ($(strip $(CPPFILES)),)
+#---------------------------------------------------------------------------------
+	export LD	:=	$(CC)
+#---------------------------------------------------------------------------------
+else
+#---------------------------------------------------------------------------------
+	export LD	:=	$(CXX)
+#---------------------------------------------------------------------------------
+endif
+#---------------------------------------------------------------------------------
+
+export OFILES_BIN	:=	$(addsuffix .o,$(BINFILES))
+export OFILES_SRC	:=	$(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o)
+export OFILES 	:=	$(OFILES_BIN) $(OFILES_SRC)
+export HFILES_BIN	:=	$(addsuffix .h,$(subst .,_,$(BINFILES)))
+
+export INCLUDE	:=	$(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \
+			$(foreach dir,$(LIBDIRS),-I$(dir)/include) \
+			-I$(CURDIR)/$(BUILD)
+
+export LIBPATHS	:=	$(foreach dir,$(LIBDIRS),-L$(dir)/lib)
+
+ifeq ($(strip $(CONFIG_JSON)),)
+	jsons := $(wildcard *.json)
+	ifneq (,$(findstring $(TARGET).json,$(jsons)))
+		export APP_JSON := $(TOPDIR)/$(TARGET).json
+	else
+		ifneq (,$(findstring config.json,$(jsons)))
+			export APP_JSON := $(TOPDIR)/config.json
+		endif
+	endif
+else
+	export APP_JSON := $(TOPDIR)/$(CONFIG_JSON)
+endif
+
+ifeq ($(strip $(ICON)),)
+	icons := $(wildcard *.jpg)
+	ifneq (,$(findstring $(TARGET).jpg,$(icons)))
+		export APP_ICON := $(TOPDIR)/$(TARGET).jpg
+	else
+		ifneq (,$(findstring icon.jpg,$(icons)))
+			export APP_ICON := $(TOPDIR)/icon.jpg
+		endif
+	endif
+else
+	export APP_ICON := $(TOPDIR)/$(ICON)
+endif
+
+ifeq ($(strip $(NO_ICON)),)
+	export NROFLAGS += --icon=$(APP_ICON)
+endif
+
+ifeq ($(strip $(NO_NACP)),)
+	export NROFLAGS += --nacp=$(CURDIR)/$(TARGET).nacp
+endif
+
+ifneq ($(APP_TITLEID),)
+	export NACPFLAGS += --titleid=$(APP_TITLEID)
+endif
+
+ifneq ($(ROMFS),)
+	export NROFLAGS += --romfsdir=$(CURDIR)/$(ROMFS)
+endif
+
+.PHONY: $(BUILD) clean all
+
+#---------------------------------------------------------------------------------
+all: $(BUILD)
+
+
+$(BUILD):
+	@[ -d $@ ] || mkdir -p $@
+	@$(MAKE) --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile
+
+#---------------------------------------------------------------------------------
+clean:
+	@rm -fr $(BUILD) $(TARGET).ovl $(TARGET).nro $(TARGET).nacp $(TARGET).elf
+
+
+#---------------------------------------------------------------------------------
+else
+.PHONY:	all
+
+DEPENDS	:=	$(OFILES:.o=.d)
+
+#---------------------------------------------------------------------------------
+# main targets
+#---------------------------------------------------------------------------------
+all	:	 $(OUTPUT).ovl
+
+$(OUTPUT).ovl		:	$(OUTPUT).elf $(OUTPUT).nacp 
+	@elf2nro $< $@ $(NROFLAGS)
+	@echo "built ... $(notdir $(OUTPUT).ovl)"
+
+$(OUTPUT).elf	:	$(OFILES)
+
+$(OFILES_SRC)	: $(HFILES_BIN)
+
+#---------------------------------------------------------------------------------
+# you need a rule like this for each extension you use as binary data
+#---------------------------------------------------------------------------------
+%.bin.o	%_bin.h :	%.bin
+#---------------------------------------------------------------------------------
+	@echo $(notdir $<)
+	@$(bin2o)
+
+-include $(DEPENDS)
+
+#---------------------------------------------------------------------------------------
+endif
+#---------------------------------------------------------------------------------------
diff --git a/libtesla/example/source/main.cpp b/libtesla/example/source/main.cpp
index 6c5e38f..e6fd54d 100644
--- a/libtesla/example/source/main.cpp
+++ b/libtesla/example/source/main.cpp
@@ -1,100 +1,100 @@
-#define TESLA_INIT_IMPL // If you have more than one file using the tesla header, only define this in the main one
-#include </mnt/c/Users/carel/Downloads/sys-tune-master-128bitsound/sys-tune-master/libtesla/include/tesla.hpp>    // The Tesla Header
-
-
-class GuiSecondary : public tsl::Gui {
-public:
-    GuiSecondary() {}
-
-    virtual tsl::elm::Element* createUI() override {
-        auto *rootFrame = new tsl::elm::OverlayFrame("Tesla Example", "v1.3.2 - Secondary Gui");
-
-        rootFrame->setContent(new tsl::elm::DebugRectangle(tsl::Color{ 0x8, 0x3, 0x8, 0xF }));
-
-        return rootFrame;
-    }
-};
-
-class GuiTest : public tsl::Gui {
-public:
-    GuiTest(u8 arg1, u8 arg2, bool arg3) { }
-
-    // Called when this Gui gets loaded to create the UI
-    // Allocate all elements on the heap. libtesla will make sure to clean them up when not needed anymore
-    virtual tsl::elm::Element* createUI() override {
-        // A OverlayFrame is the base element every overlay consists of. This will draw the default Title and Subtitle.
-        // If you need more information in the header or want to change it's look, use a HeaderOverlayFrame.
-        auto frame = new tsl::elm::OverlayFrame("Tesla Example", "v1.3.2");
-
-        // A list that can contain sub elements and handles scrolling
-        auto list = new tsl::elm::List();
-
-        // List Items
-        list->addItem(new tsl::elm::CategoryHeader("List items"));
-
-        auto *clickableListItem = new tsl::elm::ListItem("Clickable List Item", "...");
-        clickableListItem->setClickListener([](u64 keys) {
-            if (keys & HidNpadButton_A) {
-                tsl::changeTo<GuiSecondary>();
-                return true;
-            }
-
-            return false;
-        });
-
-        list->addItem(clickableListItem);
-        list->addItem(new tsl::elm::ListItem("Default List Item"));
-        list->addItem(new tsl::elm::ListItem("Default List Item with an extra long name to trigger truncation and scrolling"));
-        list->addItem(new tsl::elm::ToggleListItem("Toggle List Item", true));
-
-        // Custom Drawer, a element that gives direct access to the renderer
-        list->addItem(new tsl::elm::CategoryHeader("Custom Drawer", true));
-        list->addItem(new tsl::elm::CustomDrawer([](tsl::gfx::Renderer *renderer, s32 x, s32 y, s32 w, s32 h) {
-            renderer->drawCircle(x + 40, y + 40, 20, true, renderer->a(0xF00F));
-            renderer->drawCircle(x + 50, y + 50, 20, true, renderer->a(0xF0F0));
-            renderer->drawRect(x + 130, y + 30, 60, 40, renderer->a(0xFF00));
-            renderer->drawString("Hello :)", false, x + 250, y + 70, 20, renderer->a(0xFF0F));
-            renderer->drawRect(x + 40, y + 90, 300, 10, renderer->a(0xF0FF));
-        }), 100);
-
-        // Track bars
-        list->addItem(new tsl::elm::CategoryHeader("Track bars"));
-        list->addItem(new tsl::elm::TrackBar("\u2600"));
-        list->addItem(new tsl::elm::StepTrackBar("\uE13C", 20));
-        list->addItem(new tsl::elm::NamedStepTrackBar("\uE132", { "Selection 1", "Selection 2", "Selection 3" }));
-
-        // Add the list to the frame for it to be drawn
-        frame->setContent(list);
-
-        // Return the frame to have it become the top level element of this Gui
-        return frame;
-    }
-
-    // Called once every frame to update values
-    virtual void update() override {
-
-    }
-
-    // Called once every frame to handle inputs not handled by other UI elements
-    virtual bool handleInput(u64 keysDown, u64 keysHeld, const HidTouchState &touchPos, HidAnalogStickState joyStickPosLeft, HidAnalogStickState joyStickPosRight) override {
-        return false;   // Return true here to signal the inputs have been consumed
-    }
-};
-
-class OverlayTest : public tsl::Overlay {
-public:
-                                             // libtesla already initialized fs, hid, pl, pmdmnt, hid:sys and set:sys
-    virtual void initServices() override {}  // Called at the start to initialize all services necessary for this Overlay
-    virtual void exitServices() override {}  // Called at the end to clean up all services previously initialized
-
-    virtual void onShow() override {}    // Called before overlay wants to change from invisible to visible state
-    virtual void onHide() override {}    // Called before overlay wants to change from visible to invisible state
-
-    virtual std::unique_ptr<tsl::Gui> loadInitialGui() override {
-        return initially<GuiTest>(1, 2, true);  // Initial Gui to load. It's possible to pass arguments to it's constructor like this
-    }
-};
-
-int main(int argc, char **argv) {
-    return tsl::loop<OverlayTest>(argc, argv);
-}
+#define TESLA_INIT_IMPL // If you have more than one file using the tesla header, only define this in the main one
+#include </mnt/c/Users/carel/Downloads/sys-tune-master-128bitsound/sys-tune-master/libtesla/include/tesla.hpp>    // The Tesla Header
+
+
+class GuiSecondary : public tsl::Gui {
+public:
+    GuiSecondary() {}
+
+    virtual tsl::elm::Element* createUI() override {
+        auto *rootFrame = new tsl::elm::OverlayFrame("Tesla Example", "v1.3.2 - Secondary Gui");
+
+        rootFrame->setContent(new tsl::elm::DebugRectangle(tsl::Color{ 0x8, 0x3, 0x8, 0xF }));
+
+        return rootFrame;
+    }
+};
+
+class GuiTest : public tsl::Gui {
+public:
+    GuiTest(u8 arg1, u8 arg2, bool arg3) { }
+
+    // Called when this Gui gets loaded to create the UI
+    // Allocate all elements on the heap. libtesla will make sure to clean them up when not needed anymore
+    virtual tsl::elm::Element* createUI() override {
+        // A OverlayFrame is the base element every overlay consists of. This will draw the default Title and Subtitle.
+        // If you need more information in the header or want to change it's look, use a HeaderOverlayFrame.
+        auto frame = new tsl::elm::OverlayFrame("Tesla Example", "v1.3.2");
+
+        // A list that can contain sub elements and handles scrolling
+        auto list = new tsl::elm::List();
+
+        // List Items
+        list->addItem(new tsl::elm::CategoryHeader("List items"));
+
+        auto *clickableListItem = new tsl::elm::ListItem("Clickable List Item", "...");
+        clickableListItem->setClickListener([](u64 keys) {
+            if (keys & HidNpadButton_A) {
+                tsl::changeTo<GuiSecondary>();
+                return true;
+            }
+
+            return false;
+        });
+
+        list->addItem(clickableListItem);
+        list->addItem(new tsl::elm::ListItem("Default List Item"));
+        list->addItem(new tsl::elm::ListItem("Default List Item with an extra long name to trigger truncation and scrolling"));
+        list->addItem(new tsl::elm::ToggleListItem("Toggle List Item", true));
+
+        // Custom Drawer, a element that gives direct access to the renderer
+        list->addItem(new tsl::elm::CategoryHeader("Custom Drawer", true));
+        list->addItem(new tsl::elm::CustomDrawer([](tsl::gfx::Renderer *renderer, s32 x, s32 y, s32 w, s32 h) {
+            renderer->drawCircle(x + 40, y + 40, 20, true, renderer->a(0xF00F));
+            renderer->drawCircle(x + 50, y + 50, 20, true, renderer->a(0xF0F0));
+            renderer->drawRect(x + 130, y + 30, 60, 40, renderer->a(0xFF00));
+            renderer->drawString("Hello :)", false, x + 250, y + 70, 20, renderer->a(0xFF0F));
+            renderer->drawRect(x + 40, y + 90, 300, 10, renderer->a(0xF0FF));
+        }), 100);
+
+        // Track bars
+        list->addItem(new tsl::elm::CategoryHeader("Track bars"));
+        list->addItem(new tsl::elm::TrackBar("\u2600"));
+        list->addItem(new tsl::elm::StepTrackBar("\uE13C", 20));
+        list->addItem(new tsl::elm::NamedStepTrackBar("\uE132", { "Selection 1", "Selection 2", "Selection 3" }));
+
+        // Add the list to the frame for it to be drawn
+        frame->setContent(list);
+
+        // Return the frame to have it become the top level element of this Gui
+        return frame;
+    }
+
+    // Called once every frame to update values
+    virtual void update() override {
+
+    }
+
+    // Called once every frame to handle inputs not handled by other UI elements
+    virtual bool handleInput(u64 keysDown, u64 keysHeld, const HidTouchState &touchPos, HidAnalogStickState joyStickPosLeft, HidAnalogStickState joyStickPosRight) override {
+        return false;   // Return true here to signal the inputs have been consumed
+    }
+};
+
+class OverlayTest : public tsl::Overlay {
+public:
+                                             // libtesla already initialized fs, hid, pl, pmdmnt, hid:sys and set:sys
+    virtual void initServices() override {}  // Called at the start to initialize all services necessary for this Overlay
+    virtual void exitServices() override {}  // Called at the end to clean up all services previously initialized
+
+    virtual void onShow() override {}    // Called before overlay wants to change from invisible to visible state
+    virtual void onHide() override {}    // Called before overlay wants to change from visible to invisible state
+
+    virtual std::unique_ptr<tsl::Gui> loadInitialGui() override {
+        return initially<GuiTest>(1, 2, true);  // Initial Gui to load. It's possible to pass arguments to it's constructor like this
+    }
+};
+
+int main(int argc, char **argv) {
+    return tsl::loop<OverlayTest>(argc, argv);
+}
diff --git a/libtesla/include/stb_truetype.h b/libtesla/include/stb_truetype.h
index e182b77..02a4040 100644
--- a/libtesla/include/stb_truetype.h
+++ b/libtesla/include/stb_truetype.h
@@ -1,5011 +1,5011 @@
-// stb_truetype.h - v1.24 - public domain
-// authored from 2009-2020 by Sean Barrett / RAD Game Tools
-//
-// =======================================================================
-//
-//    NO SECURITY GUARANTEE -- DO NOT USE THIS ON UNTRUSTED FONT FILES
-//
-// This library does no range checking of the offsets found in the file,
-// meaning an attacker can use it to read arbitrary memory.
-//
-// =======================================================================
-//
-//   This library processes TrueType files:
-//        parse files
-//        extract glyph metrics
-//        extract glyph shapes
-//        render glyphs to one-channel bitmaps with antialiasing (box filter)
-//        render glyphs to one-channel SDF bitmaps (signed-distance field/function)
-//
-//   Todo:
-//        non-MS cmaps
-//        crashproof on bad data
-//        hinting? (no longer patented)
-//        cleartype-style AA?
-//        optimize: use simple memory allocator for intermediates
-//        optimize: build edge-list directly from curves
-//        optimize: rasterize directly from curves?
-//
-// ADDITIONAL CONTRIBUTORS
-//
-//   Mikko Mononen: compound shape support, more cmap formats
-//   Tor Andersson: kerning, subpixel rendering
-//   Dougall Johnson: OpenType / Type 2 font handling
-//   Daniel Ribeiro Maciel: basic GPOS-based kerning
-//
-//   Misc other:
-//       Ryan Gordon
-//       Simon Glass
-//       github:IntellectualKitty
-//       Imanol Celaya
-//       Daniel Ribeiro Maciel
-//
-//   Bug/warning reports/fixes:
-//       "Zer" on mollyrocket       Fabian "ryg" Giesen   github:NiLuJe
-//       Cass Everitt               Martins Mozeiko       github:aloucks
-//       stoiko (Haemimont Games)   Cap Petschulat        github:oyvindjam
-//       Brian Hook                 Omar Cornut           github:vassvik
-//       Walter van Niftrik         Ryan Griege
-//       David Gow                  Peter LaValle
-//       David Given                Sergey Popov
-//       Ivan-Assen Ivanov          Giumo X. Clanjor
-//       Anthony Pesch              Higor Euripedes
-//       Johan Duparc               Thomas Fields
-//       Hou Qiming                 Derek Vinyard
-//       Rob Loach                  Cort Stratton
-//       Kenney Phillis Jr.         Brian Costabile
-//       Ken Voskuil (kaesve)
-//
-// VERSION HISTORY
-//
-//   1.24 (2020-02-05) fix warning
-//   1.23 (2020-02-02) query SVG data for glyphs; query whole kerning table (but only kern not GPOS)
-//   1.22 (2019-08-11) minimize missing-glyph duplication; fix kerning if both 'GPOS' and 'kern' are defined
-//   1.21 (2019-02-25) fix warning
-//   1.20 (2019-02-07) PackFontRange skips missing codepoints; GetScaleFontVMetrics()
-//   1.19 (2018-02-11) GPOS kerning, STBTT_fmod
-//   1.18 (2018-01-29) add missing function
-//   1.17 (2017-07-23) make more arguments const; doc fix
-//   1.16 (2017-07-12) SDF support
-//   1.15 (2017-03-03) make more arguments const
-//   1.14 (2017-01-16) num-fonts-in-TTC function
-//   1.13 (2017-01-02) support OpenType fonts, certain Apple fonts
-//   1.12 (2016-10-25) suppress warnings about casting away const with -Wcast-qual
-//   1.11 (2016-04-02) fix unused-variable warning
-//   1.10 (2016-04-02) user-defined fabs(); rare memory leak; remove duplicate typedef
-//   1.09 (2016-01-16) warning fix; avoid crash on outofmem; use allocation userdata properly
-//   1.08 (2015-09-13) document stbtt_Rasterize(); fixes for vertical & horizontal edges
-//   1.07 (2015-08-01) allow PackFontRanges to accept arrays of sparse codepoints;
-//                     variant PackFontRanges to pack and render in separate phases;
-//                     fix stbtt_GetFontOFfsetForIndex (never worked for non-0 input?);
-//                     fixed an assert() bug in the new rasterizer
-//                     replace assert() with STBTT_assert() in new rasterizer
-//
-//   Full history can be found at the end of this file.
-//
-// LICENSE
-//
-//   See end of file for license information.
-//
-// USAGE
-//
-//   Include this file in whatever places need to refer to it. In ONE C/C++
-//   file, write:
-//      #define STB_TRUETYPE_IMPLEMENTATION
-//   before the #include of this file. This expands out the actual
-//   implementation into that C/C++ file.
-//
-//   To make the implementation private to the file that generates the implementation,
-//      #define STBTT_STATIC
-//
-//   Simple 3D API (don't ship this, but it's fine for tools and quick start)
-//           stbtt_BakeFontBitmap()               -- bake a font to a bitmap for use as texture
-//           stbtt_GetBakedQuad()                 -- compute quad to draw for a given char
-//
-//   Improved 3D API (more shippable):
-//           #include "stb_rect_pack.h"           -- optional, but you really want it
-//           stbtt_PackBegin()
-//           stbtt_PackSetOversampling()          -- for improved quality on small fonts
-//           stbtt_PackFontRanges()               -- pack and renders
-//           stbtt_PackEnd()
-//           stbtt_GetPackedQuad()
-//
-//   "Load" a font file from a memory buffer (you have to keep the buffer loaded)
-//           stbtt_InitFont()
-//           stbtt_GetFontOffsetForIndex()        -- indexing for TTC font collections
-//           stbtt_GetNumberOfFonts()             -- number of fonts for TTC font collections
-//
-//   Render a unicode codepoint to a bitmap
-//           stbtt_GetCodepointBitmap()           -- allocates and returns a bitmap
-//           stbtt_MakeCodepointBitmap()          -- renders into bitmap you provide
-//           stbtt_GetCodepointBitmapBox()        -- how big the bitmap must be
-//
-//   Character advance/positioning
-//           stbtt_GetCodepointHMetrics()
-//           stbtt_GetFontVMetrics()
-//           stbtt_GetFontVMetricsOS2()
-//           stbtt_GetCodepointKernAdvance()
-//
-//   Starting with version 1.06, the rasterizer was replaced with a new,
-//   faster and generally-more-precise rasterizer. The new rasterizer more
-//   accurately measures pixel coverage for anti-aliasing, except in the case
-//   where multiple shapes overlap, in which case it overestimates the AA pixel
-//   coverage. Thus, anti-aliasing of intersecting shapes may look wrong. If
-//   this turns out to be a problem, you can re-enable the old rasterizer with
-//        #define STBTT_RASTERIZER_VERSION 1
-//   which will incur about a 15% speed hit.
-//
-// ADDITIONAL DOCUMENTATION
-//
-//   Immediately after this block comment are a series of sample programs.
-//
-//   After the sample programs is the "header file" section. This section
-//   includes documentation for each API function.
-//
-//   Some important concepts to understand to use this library:
-//
-//      Codepoint
-//         Characters are defined by unicode codepoints, e.g. 65 is
-//         uppercase A, 231 is lowercase c with a cedilla, 0x7e30 is
-//         the hiragana for "ma".
-//
-//      Glyph
-//         A visual character shape (every codepoint is rendered as
-//         some glyph)
-//
-//      Glyph index
-//         A font-specific integer ID representing a glyph
-//
-//      Baseline
-//         Glyph shapes are defined relative to a baseline, which is the
-//         bottom of uppercase characters. Characters extend both above
-//         and below the baseline.
-//
-//      Current Point
-//         As you draw text to the screen, you keep track of a "current point"
-//         which is the origin of each character. The current point's vertical
-//         position is the baseline. Even "baked fonts" use this model.
-//
-//      Vertical Font Metrics
-//         The vertical qualities of the font, used to vertically position
-//         and space the characters. See docs for stbtt_GetFontVMetrics.
-//
-//      Font Size in Pixels or Points
-//         The preferred interface for specifying font sizes in stb_truetype
-//         is to specify how tall the font's vertical extent should be in pixels.
-//         If that sounds good enough, skip the next paragraph.
-//
-//         Most font APIs instead use "points", which are a common typographic
-//         measurement for describing font size, defined as 72 points per inch.
-//         stb_truetype provides a point API for compatibility. However, true
-//         "per inch" conventions don't make much sense on computer displays
-//         since different monitors have different number of pixels per
-//         inch. For example, Windows traditionally uses a convention that
-//         there are 96 pixels per inch, thus making 'inch' measurements have
-//         nothing to do with inches, and thus effectively defining a point to
-//         be 1.333 pixels. Additionally, the TrueType font data provides
-//         an explicit scale factor to scale a given font's glyphs to points,
-//         but the author has observed that this scale factor is often wrong
-//         for non-commercial fonts, thus making fonts scaled in points
-//         according to the TrueType spec incoherently sized in practice.
-//
-// DETAILED USAGE:
-//
-//  Scale:
-//    Select how high you want the font to be, in points or pixels.
-//    Call ScaleForPixelHeight or ScaleForMappingEmToPixels to compute
-//    a scale factor SF that will be used by all other functions.
-//
-//  Baseline:
-//    You need to select a y-coordinate that is the baseline of where
-//    your text will appear. Call GetFontBoundingBox to get the baseline-relative
-//    bounding box for all characters. SF*-y0 will be the distance in pixels
-//    that the worst-case character could extend above the baseline, so if
-//    you want the top edge of characters to appear at the top of the
-//    screen where y=0, then you would set the baseline to SF*-y0.
-//
-//  Current point:
-//    Set the current point where the first character will appear. The
-//    first character could extend left of the current point; this is font
-//    dependent. You can either choose a current point that is the leftmost
-//    point and hope, or add some padding, or check the bounding box or
-//    left-side-bearing of the first character to be displayed and set
-//    the current point based on that.
-//
-//  Displaying a character:
-//    Compute the bounding box of the character. It will contain signed values
-//    relative to <current_point, baseline>. I.e. if it returns x0,y0,x1,y1,
-//    then the character should be displayed in the rectangle from
-//    <current_point+SF*x0, baseline+SF*y0> to <current_point+SF*x1,baseline+SF*y1).
-//
-//  Advancing for the next character:
-//    Call GlyphHMetrics, and compute 'current_point += SF * advance'.
-//
-//
-// ADVANCED USAGE
-//
-//   Quality:
-//
-//    - Use the functions with Subpixel at the end to allow your characters
-//      to have subpixel positioning. Since the font is anti-aliased, not
-//      hinted, this is very import for quality. (This is not possible with
-//      baked fonts.)
-//
-//    - Kerning is now supported, and if you're supporting subpixel rendering
-//      then kerning is worth using to give your text a polished look.
-//
-//   Performance:
-//
-//    - Convert Unicode codepoints to glyph indexes and operate on the glyphs;
-//      if you don't do this, stb_truetype is forced to do the conversion on
-//      every call.
-//
-//    - There are a lot of memory allocations. We should modify it to take
-//      a temp buffer and allocate from the temp buffer (without freeing),
-//      should help performance a lot.
-//
-// NOTES
-//
-//   The system uses the raw data found in the .ttf file without changing it
-//   and without building auxiliary data structures. This is a bit inefficient
-//   on little-endian systems (the data is big-endian), but assuming you're
-//   caching the bitmaps or glyph shapes this shouldn't be a big deal.
-//
-//   It appears to be very hard to programmatically determine what font a
-//   given file is in a general way. I provide an API for this, but I don't
-//   recommend it.
-//
-//
-// PERFORMANCE MEASUREMENTS FOR 1.06:
-//
-//                      32-bit     64-bit
-//   Previous release:  8.83 s     7.68 s
-//   Pool allocations:  7.72 s     6.34 s
-//   Inline sort     :  6.54 s     5.65 s
-//   New rasterizer  :  5.63 s     5.00 s
-
-//////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-////
-////  SAMPLE PROGRAMS
-////
-//
-//  Incomplete text-in-3d-api example, which draws quads properly aligned to be lossless
-//
-#if 0
-#define STB_TRUETYPE_IMPLEMENTATION  // force following include to generate implementation
-#include "stb_truetype.h"
-
-unsigned char ttf_buffer[1<<20];
-unsigned char temp_bitmap[512*512];
-
-stbtt_bakedchar cdata[96]; // ASCII 32..126 is 95 glyphs
-GLuint ftex;
-
-void my_stbtt_initfont(void)
-{
-   fread(ttf_buffer, 1, 1<<20, fopen("c:/windows/fonts/times.ttf", "rb"));
-   stbtt_BakeFontBitmap(ttf_buffer,0, 32.0, temp_bitmap,512,512, 32,96, cdata); // no guarantee this fits!
-   // can free ttf_buffer at this point
-   glGenTextures(1, &ftex);
-   glBindTexture(GL_TEXTURE_2D, ftex);
-   glTexImage2D(GL_TEXTURE_2D, 0, GL_ALPHA, 512,512, 0, GL_ALPHA, GL_UNSIGNED_BYTE, temp_bitmap);
-   // can free temp_bitmap at this point
-   glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-}
-
-void my_stbtt_print(double x, double y, char *text)
-{
-   // assume orthographic projection with units = screen pixels, origin at top left
-   glEnable(GL_TEXTURE_2D);
-   glBindTexture(GL_TEXTURE_2D, ftex);
-   glBegin(GL_QUADS);
-   while (*text) {
-      if (*text >= 32 && *text < 128) {
-         stbtt_aligned_quad q;
-         stbtt_GetBakedQuad(cdata, 512,512, *text-32, &x,&y,&q,1);//1=opengl & d3d10+,0=d3d9
-         glTexCoord2f(q.s0,q.t1); glVertex2f(q.x0,q.y0);
-         glTexCoord2f(q.s1,q.t1); glVertex2f(q.x1,q.y0);
-         glTexCoord2f(q.s1,q.t0); glVertex2f(q.x1,q.y1);
-         glTexCoord2f(q.s0,q.t0); glVertex2f(q.x0,q.y1);
-      }
-      ++text;
-   }
-   glEnd();
-}
-#endif
-//
-//
-//////////////////////////////////////////////////////////////////////////////
-//
-// Complete program (this compiles): get a single bitmap, print as ASCII art
-//
-#if 0
-#include <stdio.h>
-#define STB_TRUETYPE_IMPLEMENTATION  // force following include to generate implementation
-#include "stb_truetype.h"
-
-char ttf_buffer[1<<25];
-
-int main(int argc, char **argv)
-{
-   stbtt_fontinfo font;
-   unsigned char *bitmap;
-   int w,h,i,j,c = (argc > 1 ? atoi(argv[1]) : 'a'), s = (argc > 2 ? atoi(argv[2]) : 20);
-
-   fread(ttf_buffer, 1, 1<<25, fopen(argc > 3 ? argv[3] : "c:/windows/fonts/arialbd.ttf", "rb"));
-
-   stbtt_InitFont(&font, ttf_buffer, stbtt_GetFontOffsetForIndex(ttf_buffer,0));
-   bitmap = stbtt_GetCodepointBitmap(&font, 0,stbtt_ScaleForPixelHeight(&font, s), c, &w, &h, 0,0);
-
-   for (j=0; j < h; ++j) {
-      for (i=0; i < w; ++i)
-         putchar(" .:ioVM@"[bitmap[j*w+i]>>5]);
-      putchar('\n');
-   }
-   return 0;
-}
-#endif
-//
-// Output:
-//
-//     .ii.
-//    @@@@@@.
-//   V@Mio@@o
-//   :i.  V@V
-//     :oM@@M
-//   :@@@MM@M
-//   @@o  o@M
-//  :@@.  M@M
-//   @@@o@@@@
-//   :M@@V:@@.
-//
-//////////////////////////////////////////////////////////////////////////////
-//
-// Complete program: print "Hello World!" banner, with bugs
-//
-#if 0
-char buffer[24<<20];
-unsigned char screen[20][79];
-
-int main(int arg, char **argv)
-{
-   stbtt_fontinfo font;
-   int i,j,ascent,baseline,ch=0;
-   double scale, xpos=2; // leave a little padding in case the character extends left
-   char *text = "Heljo World!"; // intentionally misspelled to show 'lj' brokenness
-
-   fread(buffer, 1, 1000000, fopen("c:/windows/fonts/arialbd.ttf", "rb"));
-   stbtt_InitFont(&font, buffer, 0);
-
-   scale = stbtt_ScaleForPixelHeight(&font, 15);
-   stbtt_GetFontVMetrics(&font, &ascent,0,0);
-   baseline = (int) (ascent*scale);
-
-   while (text[ch]) {
-      int advance,lsb,x0,y0,x1,y1;
-      double x_shift = xpos - (double) floor(xpos);
-      stbtt_GetCodepointHMetrics(&font, text[ch], &advance, &lsb);
-      stbtt_GetCodepointBitmapBoxSubpixel(&font, text[ch], scale,scale,x_shift,0, &x0,&y0,&x1,&y1);
-      stbtt_MakeCodepointBitmapSubpixel(&font, &screen[baseline + y0][(int) xpos + x0], x1-x0,y1-y0, 79, scale,scale,x_shift,0, text[ch]);
-      // note that this stomps the old data, so where character boxes overlap (e.g. 'lj') it's wrong
-      // because this API is really for baking character bitmaps into textures. if you want to render
-      // a sequence of characters, you really need to render each bitmap to a temp buffer, then
-      // "alpha blend" that into the working buffer
-      xpos += (advance * scale);
-      if (text[ch+1])
-         xpos += scale*stbtt_GetCodepointKernAdvance(&font, text[ch],text[ch+1]);
-      ++ch;
-   }
-
-   for (j=0; j < 20; ++j) {
-      for (i=0; i < 78; ++i)
-         putchar(" .:ioVM@"[screen[j][i]>>5]);
-      putchar('\n');
-   }
-
-   return 0;
-}
-#endif
-
-
-//////////////////////////////////////////////////////////////////////////////
-//////////////////////////////////////////////////////////////////////////////
-////
-////   INTEGRATION WITH YOUR CODEBASE
-////
-////   The following sections allow you to supply alternate definitions
-////   of C library functions used by stb_truetype, e.g. if you don't
-////   link with the C runtime library.
-
-#ifdef STB_TRUETYPE_IMPLEMENTATION
-   // #define your own (u)stbtt_int8/16/32 before including to override this
-   #ifndef stbtt_uint8
-   typedef unsigned char   stbtt_uint8;
-   typedef signed   char   stbtt_int8;
-   typedef unsigned short  stbtt_uint16;
-   typedef signed   short  stbtt_int16;
-   typedef unsigned int    stbtt_uint32;
-   typedef signed   int    stbtt_int32;
-   #endif
-
-   typedef char stbtt__check_size32[sizeof(stbtt_int32)==4 ? 1 : -1];
-   typedef char stbtt__check_size16[sizeof(stbtt_int16)==2 ? 1 : -1];
-
-   // e.g. #define your own STBTT_ifloor/STBTT_iceil() to avoid math.h
-   #ifndef STBTT_ifloor
-   #include <math.h>
-   #define STBTT_ifloor(x)   ((int) floor(x))
-   #define STBTT_iceil(x)    ((int) ceil(x))
-   #endif
-
-   #ifndef STBTT_sqrt
-   #include <math.h>
-   #define STBTT_sqrt(x)      sqrt(x)
-   #define STBTT_pow(x,y)     pow(x,y)
-   #endif
-
-   #ifndef STBTT_fmod
-   #include <math.h>
-   #define STBTT_fmod(x,y)    fmod(x,y)
-   #endif
-
-   #ifndef STBTT_cos
-   #include <math.h>
-   #define STBTT_cos(x)       cos(x)
-   #define STBTT_acos(x)      acos(x)
-   #endif
-
-   #ifndef STBTT_fabs
-   #include <math.h>
-   #define STBTT_fabs(x)      fabs(x)
-   #endif
-
-   // #define your own functions "STBTT_malloc" / "STBTT_free" to avoid malloc.h
-   #ifndef STBTT_malloc
-   #include <stdlib.h>
-   #define STBTT_malloc(x,u)  ((void)(u),malloc(x))
-   #define STBTT_free(x,u)    ((void)(u),free(x))
-   #endif
-
-   #ifndef STBTT_assert
-   #include <assert.h>
-   #define STBTT_assert(x)    assert(x)
-   #endif
-
-   #ifndef STBTT_strlen
-   #include <string.h>
-   #define STBTT_strlen(x)    strlen(x)
-   #endif
-
-   #ifndef STBTT_memcpy
-   #include <string.h>
-   #define STBTT_memcpy       memcpy
-   #define STBTT_memset       memset
-   #endif
-#endif
-
-///////////////////////////////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////
-////
-////   INTERFACE
-////
-////
-
-#ifndef __STB_INCLUDE_STB_TRUETYPE_H__
-#define __STB_INCLUDE_STB_TRUETYPE_H__
-
-#ifdef STBTT_STATIC
-#define STBTT_DEF static
-#else
-#define STBTT_DEF extern
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// private structure
-typedef struct
-{
-   unsigned char *data;
-   int cursor;
-   int size;
-} stbtt__buf;
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// TEXTURE BAKING API
-//
-// If you use this API, you only have to call two functions ever.
-//
-
-typedef struct
-{
-   unsigned short x0,y0,x1,y1; // coordinates of bbox in bitmap
-   double xoff,yoff,xadvance;
-} stbtt_bakedchar;
-
-STBTT_DEF int stbtt_BakeFontBitmap(const unsigned char *data, int offset,  // font location (use offset=0 for plain .ttf)
-                                double pixel_height,                     // height of font in pixels
-                                unsigned char *pixels, int pw, int ph,  // bitmap to be filled in
-                                int first_char, int num_chars,          // characters to bake
-                                stbtt_bakedchar *chardata);             // you allocate this, it's num_chars long
-// if return is positive, the first unused row of the bitmap
-// if return is negative, returns the negative of the number of characters that fit
-// if return is 0, no characters fit and no rows were used
-// This uses a very crappy packing.
-
-typedef struct
-{
-   double x0,y0,s0,t0; // top-left
-   double x1,y1,s1,t1; // bottom-right
-} stbtt_aligned_quad;
-
-STBTT_DEF void stbtt_GetBakedQuad(const stbtt_bakedchar *chardata, int pw, int ph,  // same data as above
-                               int char_index,             // character to display
-                               double *xpos, double *ypos,   // pointers to current position in screen pixel space
-                               stbtt_aligned_quad *q,      // output: quad to draw
-                               int opengl_fillrule);       // true if opengl fill rule; false if DX9 or earlier
-// Call GetBakedQuad with char_index = 'character - first_char', and it
-// creates the quad you need to draw and advances the current position.
-//
-// The coordinate system used assumes y increases downwards.
-//
-// Characters will extend both above and below the current position;
-// see discussion of "BASELINE" above.
-//
-// It's inefficient; you might want to c&p it and optimize it.
-
-STBTT_DEF void stbtt_GetScaledFontVMetrics(const unsigned char *fontdata, int index, double size, double *ascent, double *descent, double *lineGap);
-// Query the font vertical metrics without having to create a font first.
-
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// NEW TEXTURE BAKING API
-//
-// This provides options for packing multiple fonts into one atlas, not
-// perfectly but better than nothing.
-
-typedef struct
-{
-   unsigned short x0,y0,x1,y1; // coordinates of bbox in bitmap
-   double xoff,yoff,xadvance;
-   double xoff2,yoff2;
-} stbtt_packedchar;
-
-typedef struct stbtt_pack_context stbtt_pack_context;
-typedef struct stbtt_fontinfo stbtt_fontinfo;
-#ifndef STB_RECT_PACK_VERSION
-typedef struct stbrp_rect stbrp_rect;
-#endif
-
-STBTT_DEF int  stbtt_PackBegin(stbtt_pack_context *spc, unsigned char *pixels, int width, int height, int stride_in_bytes, int padding, void *alloc_context);
-// Initializes a packing context stored in the passed-in stbtt_pack_context.
-// Future calls using this context will pack characters into the bitmap passed
-// in here: a 1-channel bitmap that is width * height. stride_in_bytes is
-// the distance from one row to the next (or 0 to mean they are packed tightly
-// together). "padding" is the amount of padding to leave between each
-// character (normally you want '1' for bitmaps you'll use as textures with
-// bilinear filtering).
-//
-// Returns 0 on failure, 1 on success.
-
-STBTT_DEF void stbtt_PackEnd  (stbtt_pack_context *spc);
-// Cleans up the packing context and frees all memory.
-
-#define STBTT_POINT_SIZE(x)   (-(x))
-
-STBTT_DEF int  stbtt_PackFontRange(stbtt_pack_context *spc, const unsigned char *fontdata, int font_index, double font_size,
-                                int first_unicode_char_in_range, int num_chars_in_range, stbtt_packedchar *chardata_for_range);
-// Creates character bitmaps from the font_index'th font found in fontdata (use
-// font_index=0 if you don't know what that is). It creates num_chars_in_range
-// bitmaps for characters with unicode values starting at first_unicode_char_in_range
-// and increasing. Data for how to render them is stored in chardata_for_range;
-// pass these to stbtt_GetPackedQuad to get back renderable quads.
-//
-// font_size is the full height of the character from ascender to descender,
-// as computed by stbtt_ScaleForPixelHeight. To use a point size as computed
-// by stbtt_ScaleForMappingEmToPixels, wrap the point size in STBTT_POINT_SIZE()
-// and pass that result as 'font_size':
-//       ...,                  20 , ... // font max minus min y is 20 pixels tall
-//       ..., STBTT_POINT_SIZE(20), ... // 'M' is 20 pixels tall
-
-typedef struct
-{
-   double font_size;
-   int first_unicode_codepoint_in_range;  // if non-zero, then the chars are continuous, and this is the first codepoint
-   int *array_of_unicode_codepoints;       // if non-zero, then this is an array of unicode codepoints
-   int num_chars;
-   stbtt_packedchar *chardata_for_range; // output
-   unsigned char h_oversample, v_oversample; // don't set these, they're used internally
-} stbtt_pack_range;
-
-STBTT_DEF int  stbtt_PackFontRanges(stbtt_pack_context *spc, const unsigned char *fontdata, int font_index, stbtt_pack_range *ranges, int num_ranges);
-// Creates character bitmaps from multiple ranges of characters stored in
-// ranges. This will usually create a better-packed bitmap than multiple
-// calls to stbtt_PackFontRange. Note that you can call this multiple
-// times within a single PackBegin/PackEnd.
-
-STBTT_DEF void stbtt_PackSetOversampling(stbtt_pack_context *spc, unsigned int h_oversample, unsigned int v_oversample);
-// Oversampling a font increases the quality by allowing higher-quality subpixel
-// positioning, and is especially valuable at smaller text sizes.
-//
-// This function sets the amount of oversampling for all following calls to
-// stbtt_PackFontRange(s) or stbtt_PackFontRangesGatherRects for a given
-// pack context. The default (no oversampling) is achieved by h_oversample=1
-// and v_oversample=1. The total number of pixels required is
-// h_oversample*v_oversample larger than the default; for example, 2x2
-// oversampling requires 4x the storage of 1x1. For best results, render
-// oversampled textures with bilinear filtering. Look at the readme in
-// stb/tests/oversample for information about oversampled fonts
-//
-// To use with PackFontRangesGather etc., you must set it before calls
-// call to PackFontRangesGatherRects.
-
-STBTT_DEF void stbtt_PackSetSkipMissingCodepoints(stbtt_pack_context *spc, int skip);
-// If skip != 0, this tells stb_truetype to skip any codepoints for which
-// there is no corresponding glyph. If skip=0, which is the default, then
-// codepoints without a glyph recived the font's "missing character" glyph,
-// typically an empty box by convention.
-
-STBTT_DEF void stbtt_GetPackedQuad(const stbtt_packedchar *chardata, int pw, int ph,  // same data as above
-                               int char_index,             // character to display
-                               double *xpos, double *ypos,   // pointers to current position in screen pixel space
-                               stbtt_aligned_quad *q,      // output: quad to draw
-                               int align_to_integer);
-
-STBTT_DEF int  stbtt_PackFontRangesGatherRects(stbtt_pack_context *spc, const stbtt_fontinfo *info, stbtt_pack_range *ranges, int num_ranges, stbrp_rect *rects);
-STBTT_DEF void stbtt_PackFontRangesPackRects(stbtt_pack_context *spc, stbrp_rect *rects, int num_rects);
-STBTT_DEF int  stbtt_PackFontRangesRenderIntoRects(stbtt_pack_context *spc, const stbtt_fontinfo *info, stbtt_pack_range *ranges, int num_ranges, stbrp_rect *rects);
-// Calling these functions in sequence is roughly equivalent to calling
-// stbtt_PackFontRanges(). If you more control over the packing of multiple
-// fonts, or if you want to pack custom data into a font texture, take a look
-// at the source to of stbtt_PackFontRanges() and create a custom version
-// using these functions, e.g. call GatherRects multiple times,
-// building up a single array of rects, then call PackRects once,
-// then call RenderIntoRects repeatedly. This may result in a
-// better packing than calling PackFontRanges multiple times
-// (or it may not).
-
-// this is an opaque structure that you shouldn't mess with which holds
-// all the context needed from PackBegin to PackEnd.
-struct stbtt_pack_context {
-   void *user_allocator_context;
-   void *pack_info;
-   int   width;
-   int   height;
-   int   stride_in_bytes;
-   int   padding;
-   int   skip_missing;
-   unsigned int   h_oversample, v_oversample;
-   unsigned char *pixels;
-   void  *nodes;
-};
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// FONT LOADING
-//
-//
-
-STBTT_DEF int stbtt_GetNumberOfFonts(const unsigned char *data);
-// This function will determine the number of fonts in a font file.  TrueType
-// collection (.ttc) files may contain multiple fonts, while TrueType font
-// (.ttf) files only contain one font. The number of fonts can be used for
-// indexing with the previous function where the index is between zero and one
-// less than the total fonts. If an error occurs, -1 is returned.
-
-STBTT_DEF int stbtt_GetFontOffsetForIndex(const unsigned char *data, int index);
-// Each .ttf/.ttc file may have more than one font. Each font has a sequential
-// index number starting from 0. Call this function to get the font offset for
-// a given index; it returns -1 if the index is out of range. A regular .ttf
-// file will only define one font and it always be at offset 0, so it will
-// return '0' for index 0, and -1 for all other indices.
-
-// The following structure is defined publicly so you can declare one on
-// the stack or as a global or etc, but you should treat it as opaque.
-struct stbtt_fontinfo
-{
-   void           * userdata;
-   unsigned char  * data;              // pointer to .ttf file
-   int              fontstart;         // offset of start of font
-
-   int numGlyphs;                     // number of glyphs, needed for range checking
-
-   int loca,head,glyf,hhea,hmtx,kern,gpos,svg; // table locations as offset from start of .ttf
-   int index_map;                     // a cmap mapping for our chosen character encoding
-   int indexToLocFormat;              // format needed to map from glyph index to glyph
-
-   stbtt__buf cff;                    // cff font data
-   stbtt__buf charstrings;            // the charstring index
-   stbtt__buf gsubrs;                 // global charstring subroutines index
-   stbtt__buf subrs;                  // private charstring subroutines index
-   stbtt__buf fontdicts;              // array of font dicts
-   stbtt__buf fdselect;               // map from glyph to fontdict
-};
-
-STBTT_DEF int stbtt_InitFont(stbtt_fontinfo *info, const unsigned char *data, int offset);
-// Given an offset into the file that defines a font, this function builds
-// the necessary cached info for the rest of the system. You must allocate
-// the stbtt_fontinfo yourself, and stbtt_InitFont will fill it out. You don't
-// need to do anything special to free it, because the contents are pure
-// value data with no additional data structures. Returns 0 on failure.
-
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// CHARACTER TO GLYPH-INDEX CONVERSIOn
-
-STBTT_DEF int stbtt_FindGlyphIndex(const stbtt_fontinfo *info, int unicode_codepoint);
-// If you're going to perform multiple operations on the same character
-// and you want a speed-up, call this function with the character you're
-// going to process, then use glyph-based functions instead of the
-// codepoint-based functions.
-// Returns 0 if the character codepoint is not defined in the font.
-
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// CHARACTER PROPERTIES
-//
-
-STBTT_DEF double stbtt_ScaleForPixelHeight(const stbtt_fontinfo *info, double pixels);
-// computes a scale factor to produce a font whose "height" is 'pixels' tall.
-// Height is measured as the distance from the highest ascender to the lowest
-// descender; in other words, it's equivalent to calling stbtt_GetFontVMetrics
-// and computing:
-//       scale = pixels / (ascent - descent)
-// so if you prefer to measure height by the ascent only, use a similar calculation.
-
-STBTT_DEF double stbtt_ScaleForMappingEmToPixels(const stbtt_fontinfo *info, double pixels);
-// computes a scale factor to produce a font whose EM size is mapped to
-// 'pixels' tall. This is probably what traditional APIs compute, but
-// I'm not positive.
-
-STBTT_DEF void stbtt_GetFontVMetrics(const stbtt_fontinfo *info, int *ascent, int *descent, int *lineGap);
-// ascent is the coordinate above the baseline the font extends; descent
-// is the coordinate below the baseline the font extends (i.e. it is typically negative)
-// lineGap is the spacing between one row's descent and the next row's ascent...
-// so you should advance the vertical position by "*ascent - *descent + *lineGap"
-//   these are expressed in unscaled coordinates, so you must multiply by
-//   the scale factor for a given size
-
-STBTT_DEF int  stbtt_GetFontVMetricsOS2(const stbtt_fontinfo *info, int *typoAscent, int *typoDescent, int *typoLineGap);
-// analogous to GetFontVMetrics, but returns the "typographic" values from the OS/2
-// table (specific to MS/Windows TTF files).
-//
-// Returns 1 on success (table present), 0 on failure.
-
-STBTT_DEF void stbtt_GetFontBoundingBox(const stbtt_fontinfo *info, int *x0, int *y0, int *x1, int *y1);
-// the bounding box around all possible characters
-
-STBTT_DEF void stbtt_GetCodepointHMetrics(const stbtt_fontinfo *info, int codepoint, int *advanceWidth, int *leftSideBearing);
-// leftSideBearing is the offset from the current horizontal position to the left edge of the character
-// advanceWidth is the offset from the current horizontal position to the next horizontal position
-//   these are expressed in unscaled coordinates
-
-STBTT_DEF int  stbtt_GetCodepointKernAdvance(const stbtt_fontinfo *info, int ch1, int ch2);
-// an additional amount to add to the 'advance' value between ch1 and ch2
-
-STBTT_DEF int stbtt_GetCodepointBox(const stbtt_fontinfo *info, int codepoint, int *x0, int *y0, int *x1, int *y1);
-// Gets the bounding box of the visible part of the glyph, in unscaled coordinates
-
-STBTT_DEF void stbtt_GetGlyphHMetrics(const stbtt_fontinfo *info, int glyph_index, int *advanceWidth, int *leftSideBearing);
-STBTT_DEF int  stbtt_GetGlyphKernAdvance(const stbtt_fontinfo *info, int glyph1, int glyph2);
-STBTT_DEF int  stbtt_GetGlyphBox(const stbtt_fontinfo *info, int glyph_index, int *x0, int *y0, int *x1, int *y1);
-// as above, but takes one or more glyph indices for greater efficiency
-
-typedef struct stbtt_kerningentry
-{
-   int glyph1; // use stbtt_FindGlyphIndex
-   int glyph2;
-   int advance;
-} stbtt_kerningentry;
-
-STBTT_DEF int  stbtt_GetKerningTableLength(const stbtt_fontinfo *info);
-STBTT_DEF int  stbtt_GetKerningTable(const stbtt_fontinfo *info, stbtt_kerningentry* table, int table_length);
-// Retrieves a complete list of all of the kerning pairs provided by the font
-// stbtt_GetKerningTable never writes more than table_length entries and returns how many entries it did write.
-// The table will be sorted by (a.glyph1 == b.glyph1)?(a.glyph2 < b.glyph2):(a.glyph1 < b.glyph1)
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// GLYPH SHAPES (you probably don't need these, but they have to go before
-// the bitmaps for C declaration-order reasons)
-//
-
-#ifndef STBTT_vmove // you can predefine these to use different values (but why?)
-   enum {
-      STBTT_vmove=1,
-      STBTT_vline,
-      STBTT_vcurve,
-      STBTT_vcubic
-   };
-#endif
-
-#ifndef stbtt_vertex // you can predefine this to use different values
-                   // (we share this with other code at RAD)
-   #define stbtt_vertex_type short // can't use stbtt_int16 because that's not visible in the header file
-   typedef struct
-   {
-      stbtt_vertex_type x,y,cx,cy,cx1,cy1;
-      unsigned char type,padding;
-   } stbtt_vertex;
-#endif
-
-STBTT_DEF int stbtt_IsGlyphEmpty(const stbtt_fontinfo *info, int glyph_index);
-// returns non-zero if nothing is drawn for this glyph
-
-STBTT_DEF int stbtt_GetCodepointShape(const stbtt_fontinfo *info, int unicode_codepoint, stbtt_vertex **vertices);
-STBTT_DEF int stbtt_GetGlyphShape(const stbtt_fontinfo *info, int glyph_index, stbtt_vertex **vertices);
-// returns # of vertices and fills *vertices with the pointer to them
-//   these are expressed in "unscaled" coordinates
-//
-// The shape is a series of contours. Each one starts with
-// a STBTT_moveto, then consists of a series of mixed
-// STBTT_lineto and STBTT_curveto segments. A lineto
-// draws a line from previous endpoint to its x,y; a curveto
-// draws a quadratic bezier from previous endpoint to
-// its x,y, using cx,cy as the bezier control point.
-
-STBTT_DEF void stbtt_FreeShape(const stbtt_fontinfo *info, stbtt_vertex *vertices);
-// frees the data allocated above
-
-STBTT_DEF int stbtt_GetCodepointSVG(const stbtt_fontinfo *info, int unicode_codepoint, const char **svg);
-STBTT_DEF int stbtt_GetGlyphSVG(const stbtt_fontinfo *info, int gl, const char **svg);
-// fills svg with the character's SVG data.
-// returns data size or 0 if SVG not found.
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// BITMAP RENDERING
-//
-
-STBTT_DEF void stbtt_FreeBitmap(unsigned char *bitmap, void *userdata);
-// frees the bitmap allocated below
-
-STBTT_DEF unsigned char *stbtt_GetCodepointBitmap(const stbtt_fontinfo *info, double scale_x, double scale_y, int codepoint, int *width, int *height, int *xoff, int *yoff);
-// allocates a large-enough single-channel 8bpp bitmap and renders the
-// specified character/glyph at the specified scale into it, with
-// antialiasing. 0 is no coverage (transparent), 255 is fully covered (opaque).
-// *width & *height are filled out with the width & height of the bitmap,
-// which is stored left-to-right, top-to-bottom.
-//
-// xoff/yoff are the offset it pixel space from the glyph origin to the top-left of the bitmap
-
-STBTT_DEF unsigned char *stbtt_GetCodepointBitmapSubpixel(const stbtt_fontinfo *info, double scale_x, double scale_y, double shift_x, double shift_y, int codepoint, int *width, int *height, int *xoff, int *yoff);
-// the same as stbtt_GetCodepoitnBitmap, but you can specify a subpixel
-// shift for the character
-
-STBTT_DEF void stbtt_MakeCodepointBitmap(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, int codepoint);
-// the same as stbtt_GetCodepointBitmap, but you pass in storage for the bitmap
-// in the form of 'output', with row spacing of 'out_stride' bytes. the bitmap
-// is clipped to out_w/out_h bytes. Call stbtt_GetCodepointBitmapBox to get the
-// width and height and positioning info for it first.
-
-STBTT_DEF void stbtt_MakeCodepointBitmapSubpixel(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, double shift_x, double shift_y, int codepoint);
-// same as stbtt_MakeCodepointBitmap, but you can specify a subpixel
-// shift for the character
-
-STBTT_DEF void stbtt_MakeCodepointBitmapSubpixelPrefilter(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, double shift_x, double shift_y, int oversample_x, int oversample_y, double *sub_x, double *sub_y, int codepoint);
-// same as stbtt_MakeCodepointBitmapSubpixel, but prefiltering
-// is performed (see stbtt_PackSetOversampling)
-
-STBTT_DEF void stbtt_GetCodepointBitmapBox(const stbtt_fontinfo *font, int codepoint, double scale_x, double scale_y, int *ix0, int *iy0, int *ix1, int *iy1);
-// get the bbox of the bitmap centered around the glyph origin; so the
-// bitmap width is ix1-ix0, height is iy1-iy0, and location to place
-// the bitmap top left is (leftSideBearing*scale,iy0).
-// (Note that the bitmap uses y-increases-down, but the shape uses
-// y-increases-up, so CodepointBitmapBox and CodepointBox are inverted.)
-
-STBTT_DEF void stbtt_GetCodepointBitmapBoxSubpixel(const stbtt_fontinfo *font, int codepoint, double scale_x, double scale_y, double shift_x, double shift_y, int *ix0, int *iy0, int *ix1, int *iy1);
-// same as stbtt_GetCodepointBitmapBox, but you can specify a subpixel
-// shift for the character
-
-// the following functions are equivalent to the above functions, but operate
-// on glyph indices instead of Unicode codepoints (for efficiency)
-STBTT_DEF unsigned char *stbtt_GetGlyphBitmap(const stbtt_fontinfo *info, double scale_x, double scale_y, int glyph, int *width, int *height, int *xoff, int *yoff);
-STBTT_DEF unsigned char *stbtt_GetGlyphBitmapSubpixel(const stbtt_fontinfo *info, double scale_x, double scale_y, double shift_x, double shift_y, int glyph, int *width, int *height, int *xoff, int *yoff);
-STBTT_DEF void stbtt_MakeGlyphBitmap(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, int glyph);
-STBTT_DEF void stbtt_MakeGlyphBitmapSubpixel(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, double shift_x, double shift_y, int glyph);
-STBTT_DEF void stbtt_MakeGlyphBitmapSubpixelPrefilter(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, double shift_x, double shift_y, int oversample_x, int oversample_y, double *sub_x, double *sub_y, int glyph);
-STBTT_DEF void stbtt_GetGlyphBitmapBox(const stbtt_fontinfo *font, int glyph, double scale_x, double scale_y, int *ix0, int *iy0, int *ix1, int *iy1);
-STBTT_DEF void stbtt_GetGlyphBitmapBoxSubpixel(const stbtt_fontinfo *font, int glyph, double scale_x, double scale_y,double shift_x, double shift_y, int *ix0, int *iy0, int *ix1, int *iy1);
-
-
-// @TODO: don't expose this structure
-typedef struct
-{
-   int w,h,stride;
-   unsigned char *pixels;
-} stbtt__bitmap;
-
-// rasterize a shape with quadratic beziers into a bitmap
-STBTT_DEF void stbtt_Rasterize(stbtt__bitmap *result,        // 1-channel bitmap to draw into
-                               double flatness_in_pixels,     // allowable error of curve in pixels
-                               stbtt_vertex *vertices,       // array of vertices defining shape
-                               int num_verts,                // number of vertices in above array
-                               double scale_x, double scale_y, // scale applied to input vertices
-                               double shift_x, double shift_y, // translation applied to input vertices
-                               int x_off, int y_off,         // another translation applied to input
-                               int invert,                   // if non-zero, vertically flip shape
-                               void *userdata);              // context for to STBTT_MALLOC
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// Signed Distance Function (or Field) rendering
-
-STBTT_DEF void stbtt_FreeSDF(unsigned char *bitmap, void *userdata);
-// frees the SDF bitmap allocated below
-
-STBTT_DEF unsigned char * stbtt_GetGlyphSDF(const stbtt_fontinfo *info, double scale, int glyph, int padding, unsigned char onedge_value, double pixel_dist_scale, int *width, int *height, int *xoff, int *yoff);
-STBTT_DEF unsigned char * stbtt_GetCodepointSDF(const stbtt_fontinfo *info, double scale, int codepoint, int padding, unsigned char onedge_value, double pixel_dist_scale, int *width, int *height, int *xoff, int *yoff);
-// These functions compute a discretized SDF field for a single character, suitable for storing
-// in a single-channel texture, sampling with bilinear filtering, and testing against
-// larger than some threshold to produce scalable fonts.
-//        info              --  the font
-//        scale             --  controls the size of the resulting SDF bitmap, same as it would be creating a regular bitmap
-//        glyph/codepoint   --  the character to generate the SDF for
-//        padding           --  extra "pixels" around the character which are filled with the distance to the character (not 0),
-//                                 which allows effects like bit outlines
-//        onedge_value      --  value 0-255 to test the SDF against to reconstruct the character (i.e. the isocontour of the character)
-//        pixel_dist_scale  --  what value the SDF should increase by when moving one SDF "pixel" away from the edge (on the 0..255 scale)
-//                                 if positive, > onedge_value is inside; if negative, < onedge_value is inside
-//        width,height      --  output height & width of the SDF bitmap (including padding)
-//        xoff,yoff         --  output origin of the character
-//        return value      --  a 2D array of bytes 0..255, width*height in size
-//
-// pixel_dist_scale & onedge_value are a scale & bias that allows you to make
-// optimal use of the limited 0..255 for your application, trading off precision
-// and special effects. SDF values outside the range 0..255 are clamped to 0..255.
-//
-// Example:
-//      scale = stbtt_ScaleForPixelHeight(22)
-//      padding = 5
-//      onedge_value = 180
-//      pixel_dist_scale = 180/5.0 = 36.0
-//
-//      This will create an SDF bitmap in which the character is about 22 pixels
-//      high but the whole bitmap is about 22+5+5=32 pixels high. To produce a filled
-//      shape, sample the SDF at each pixel and fill the pixel if the SDF value
-//      is greater than or equal to 180/255. (You'll actually want to antialias,
-//      which is beyond the scope of this example.) Additionally, you can compute
-//      offset outlines (e.g. to stroke the character border inside & outside,
-//      or only outside). For example, to fill outside the character up to 3 SDF
-//      pixels, you would compare against (180-36.0*3)/255 = 72/255. The above
-//      choice of variables maps a range from 5 pixels outside the shape to
-//      2 pixels inside the shape to 0..255; this is intended primarily for apply
-//      outside effects only (the interior range is needed to allow proper
-//      antialiasing of the font at *smaller* sizes)
-//
-// The function computes the SDF analytically at each SDF pixel, not by e.g.
-// building a higher-res bitmap and approximating it. In theory the quality
-// should be as high as possible for an SDF of this size & representation, but
-// unclear if this is true in practice (perhaps building a higher-res bitmap
-// and computing from that can allow drop-out prevention).
-//
-// The algorithm has not been optimized at all, so expect it to be slow
-// if computing lots of characters or very large sizes.
-
-
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// Finding the right font...
-//
-// You should really just solve this offline, keep your own tables
-// of what font is what, and don't try to get it out of the .ttf file.
-// That's because getting it out of the .ttf file is really hard, because
-// the names in the file can appear in many possible encodings, in many
-// possible languages, and e.g. if you need a case-insensitive comparison,
-// the details of that depend on the encoding & language in a complex way
-// (actually underspecified in truetype, but also gigantic).
-//
-// But you can use the provided functions in two possible ways:
-//     stbtt_FindMatchingFont() will use *case-sensitive* comparisons on
-//             unicode-encoded names to try to find the font you want;
-//             you can run this before calling stbtt_InitFont()
-//
-//     stbtt_GetFontNameString() lets you get any of the various strings
-//             from the file yourself and do your own comparisons on them.
-//             You have to have called stbtt_InitFont() first.
-
-
-STBTT_DEF int stbtt_FindMatchingFont(const unsigned char *fontdata, const char *name, int flags);
-// returns the offset (not index) of the font that matches, or -1 if none
-//   if you use STBTT_MACSTYLE_DONTCARE, use a font name like "Arial Bold".
-//   if you use any other flag, use a font name like "Arial"; this checks
-//     the 'macStyle' header field; i don't know if fonts set this consistently
-#define STBTT_MACSTYLE_DONTCARE     0
-#define STBTT_MACSTYLE_BOLD         1
-#define STBTT_MACSTYLE_ITALIC       2
-#define STBTT_MACSTYLE_UNDERSCORE   4
-#define STBTT_MACSTYLE_NONE         8   // <= not same as 0, this makes us check the bitfield is 0
-
-STBTT_DEF int stbtt_CompareUTF8toUTF16_bigendian(const char *s1, int len1, const char *s2, int len2);
-// returns 1/0 whether the first string interpreted as utf8 is identical to
-// the second string interpreted as big-endian utf16... useful for strings from next func
-
-STBTT_DEF const char *stbtt_GetFontNameString(const stbtt_fontinfo *font, int *length, int platformID, int encodingID, int languageID, int nameID);
-// returns the string (which may be big-endian double byte, e.g. for unicode)
-// and puts the length in bytes in *length.
-//
-// some of the values for the IDs are below; for more see the truetype spec:
-//     http://developer.apple.com/textfonts/TTRefMan/RM06/Chap6name.html
-//     http://www.microsoft.com/typography/otspec/name.htm
-
-enum { // platformID
-   STBTT_PLATFORM_ID_UNICODE   =0,
-   STBTT_PLATFORM_ID_MAC       =1,
-   STBTT_PLATFORM_ID_ISO       =2,
-   STBTT_PLATFORM_ID_MICROSOFT =3
-};
-
-enum { // encodingID for STBTT_PLATFORM_ID_UNICODE
-   STBTT_UNICODE_EID_UNICODE_1_0    =0,
-   STBTT_UNICODE_EID_UNICODE_1_1    =1,
-   STBTT_UNICODE_EID_ISO_10646      =2,
-   STBTT_UNICODE_EID_UNICODE_2_0_BMP=3,
-   STBTT_UNICODE_EID_UNICODE_2_0_FULL=4
-};
-
-enum { // encodingID for STBTT_PLATFORM_ID_MICROSOFT
-   STBTT_MS_EID_SYMBOL        =0,
-   STBTT_MS_EID_UNICODE_BMP   =1,
-   STBTT_MS_EID_SHIFTJIS      =2,
-   STBTT_MS_EID_UNICODE_FULL  =10
-};
-
-enum { // encodingID for STBTT_PLATFORM_ID_MAC; same as Script Manager codes
-   STBTT_MAC_EID_ROMAN        =0,   STBTT_MAC_EID_ARABIC       =4,
-   STBTT_MAC_EID_JAPANESE     =1,   STBTT_MAC_EID_HEBREW       =5,
-   STBTT_MAC_EID_CHINESE_TRAD =2,   STBTT_MAC_EID_GREEK        =6,
-   STBTT_MAC_EID_KOREAN       =3,   STBTT_MAC_EID_RUSSIAN      =7
-};
-
-enum { // languageID for STBTT_PLATFORM_ID_MICROSOFT; same as LCID...
-       // problematic because there are e.g. 16 english LCIDs and 16 arabic LCIDs
-   STBTT_MS_LANG_ENGLISH     =0x0409,   STBTT_MS_LANG_ITALIAN     =0x0410,
-   STBTT_MS_LANG_CHINESE     =0x0804,   STBTT_MS_LANG_JAPANESE    =0x0411,
-   STBTT_MS_LANG_DUTCH       =0x0413,   STBTT_MS_LANG_KOREAN      =0x0412,
-   STBTT_MS_LANG_FRENCH      =0x040c,   STBTT_MS_LANG_RUSSIAN     =0x0419,
-   STBTT_MS_LANG_GERMAN      =0x0407,   STBTT_MS_LANG_SPANISH     =0x0409,
-   STBTT_MS_LANG_HEBREW      =0x040d,   STBTT_MS_LANG_SWEDISH     =0x041D
-};
-
-enum { // languageID for STBTT_PLATFORM_ID_MAC
-   STBTT_MAC_LANG_ENGLISH      =0 ,   STBTT_MAC_LANG_JAPANESE     =11,
-   STBTT_MAC_LANG_ARABIC       =12,   STBTT_MAC_LANG_KOREAN       =23,
-   STBTT_MAC_LANG_DUTCH        =4 ,   STBTT_MAC_LANG_RUSSIAN      =32,
-   STBTT_MAC_LANG_FRENCH       =1 ,   STBTT_MAC_LANG_SPANISH      =6 ,
-   STBTT_MAC_LANG_GERMAN       =2 ,   STBTT_MAC_LANG_SWEDISH      =5 ,
-   STBTT_MAC_LANG_HEBREW       =10,   STBTT_MAC_LANG_CHINESE_SIMPLIFIED =33,
-   STBTT_MAC_LANG_ITALIAN      =3 ,   STBTT_MAC_LANG_CHINESE_TRAD =19
-};
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // __STB_INCLUDE_STB_TRUETYPE_H__
-
-///////////////////////////////////////////////////////////////////////////////
-///////////////////////////////////////////////////////////////////////////////
-////
-////   IMPLEMENTATION
-////
-////
-
-#ifdef STB_TRUETYPE_IMPLEMENTATION
-
-#ifndef STBTT_MAX_OVERSAMPLE
-#define STBTT_MAX_OVERSAMPLE   8
-#endif
-
-#if STBTT_MAX_OVERSAMPLE > 255
-#error "STBTT_MAX_OVERSAMPLE cannot be > 255"
-#endif
-
-typedef int stbtt__test_oversample_pow2[(STBTT_MAX_OVERSAMPLE & (STBTT_MAX_OVERSAMPLE-1)) == 0 ? 1 : -1];
-
-#ifndef STBTT_RASTERIZER_VERSION
-#define STBTT_RASTERIZER_VERSION 2
-#endif
-
-#ifdef _MSC_VER
-#define STBTT__NOTUSED(v)  (void)(v)
-#else
-#define STBTT__NOTUSED(v)  (void)sizeof(v)
-#endif
-
-//////////////////////////////////////////////////////////////////////////
-//
-// stbtt__buf helpers to parse data from file
-//
-
-static stbtt_uint8 stbtt__buf_get8(stbtt__buf *b)
-{
-   if (b->cursor >= b->size)
-      return 0;
-   return b->data[b->cursor++];
-}
-
-static stbtt_uint8 stbtt__buf_peek8(stbtt__buf *b)
-{
-   if (b->cursor >= b->size)
-      return 0;
-   return b->data[b->cursor];
-}
-
-static void stbtt__buf_seek(stbtt__buf *b, int o)
-{
-   STBTT_assert(!(o > b->size || o < 0));
-   b->cursor = (o > b->size || o < 0) ? b->size : o;
-}
-
-static void stbtt__buf_skip(stbtt__buf *b, int o)
-{
-   stbtt__buf_seek(b, b->cursor + o);
-}
-
-static stbtt_uint32 stbtt__buf_get(stbtt__buf *b, int n)
-{
-   stbtt_uint32 v = 0;
-   int i;
-   STBTT_assert(n >= 1 && n <= 4);
-   for (i = 0; i < n; i++)
-      v = (v << 8) | stbtt__buf_get8(b);
-   return v;
-}
-
-static stbtt__buf stbtt__new_buf(const void *p, size_t size)
-{
-   stbtt__buf r;
-   STBTT_assert(size < 0x40000000);
-   r.data = (stbtt_uint8*) p;
-   r.size = (int) size;
-   r.cursor = 0;
-   return r;
-}
-
-#define stbtt__buf_get16(b)  stbtt__buf_get((b), 2)
-#define stbtt__buf_get32(b)  stbtt__buf_get((b), 4)
-
-static stbtt__buf stbtt__buf_range(const stbtt__buf *b, int o, int s)
-{
-   stbtt__buf r = stbtt__new_buf(NULL, 0);
-   if (o < 0 || s < 0 || o > b->size || s > b->size - o) return r;
-   r.data = b->data + o;
-   r.size = s;
-   return r;
-}
-
-static stbtt__buf stbtt__cff_get_index(stbtt__buf *b)
-{
-   int count, start, offsize;
-   start = b->cursor;
-   count = stbtt__buf_get16(b);
-   if (count) {
-      offsize = stbtt__buf_get8(b);
-      STBTT_assert(offsize >= 1 && offsize <= 4);
-      stbtt__buf_skip(b, offsize * count);
-      stbtt__buf_skip(b, stbtt__buf_get(b, offsize) - 1);
-   }
-   return stbtt__buf_range(b, start, b->cursor - start);
-}
-
-static stbtt_uint32 stbtt__cff_int(stbtt__buf *b)
-{
-   int b0 = stbtt__buf_get8(b);
-   if (b0 >= 32 && b0 <= 246)       return b0 - 139;
-   else if (b0 >= 247 && b0 <= 250) return (b0 - 247)*256 + stbtt__buf_get8(b) + 108;
-   else if (b0 >= 251 && b0 <= 254) return -(b0 - 251)*256 - stbtt__buf_get8(b) - 108;
-   else if (b0 == 28)               return stbtt__buf_get16(b);
-   else if (b0 == 29)               return stbtt__buf_get32(b);
-   STBTT_assert(0);
-   return 0;
-}
-
-static void stbtt__cff_skip_operand(stbtt__buf *b) {
-   int v, b0 = stbtt__buf_peek8(b);
-   STBTT_assert(b0 >= 28);
-   if (b0 == 30) {
-      stbtt__buf_skip(b, 1);
-      while (b->cursor < b->size) {
-         v = stbtt__buf_get8(b);
-         if ((v & 0xF) == 0xF || (v >> 4) == 0xF)
-            break;
-      }
-   } else {
-      stbtt__cff_int(b);
-   }
-}
-
-static stbtt__buf stbtt__dict_get(stbtt__buf *b, int key)
-{
-   stbtt__buf_seek(b, 0);
-   while (b->cursor < b->size) {
-      int start = b->cursor, end, op;
-      while (stbtt__buf_peek8(b) >= 28)
-         stbtt__cff_skip_operand(b);
-      end = b->cursor;
-      op = stbtt__buf_get8(b);
-      if (op == 12)  op = stbtt__buf_get8(b) | 0x100;
-      if (op == key) return stbtt__buf_range(b, start, end-start);
-   }
-   return stbtt__buf_range(b, 0, 0);
-}
-
-static void stbtt__dict_get_ints(stbtt__buf *b, int key, int outcount, stbtt_uint32 *out)
-{
-   int i;
-   stbtt__buf operands = stbtt__dict_get(b, key);
-   for (i = 0; i < outcount && operands.cursor < operands.size; i++)
-      out[i] = stbtt__cff_int(&operands);
-}
-
-static int stbtt__cff_index_count(stbtt__buf *b)
-{
-   stbtt__buf_seek(b, 0);
-   return stbtt__buf_get16(b);
-}
-
-static stbtt__buf stbtt__cff_index_get(stbtt__buf b, int i)
-{
-   int count, offsize, start, end;
-   stbtt__buf_seek(&b, 0);
-   count = stbtt__buf_get16(&b);
-   offsize = stbtt__buf_get8(&b);
-   STBTT_assert(i >= 0 && i < count);
-   STBTT_assert(offsize >= 1 && offsize <= 4);
-   stbtt__buf_skip(&b, i*offsize);
-   start = stbtt__buf_get(&b, offsize);
-   end = stbtt__buf_get(&b, offsize);
-   return stbtt__buf_range(&b, 2+(count+1)*offsize+start, end - start);
-}
-
-//////////////////////////////////////////////////////////////////////////
-//
-// accessors to parse data from file
-//
-
-// on platforms that don't allow misaligned reads, if we want to allow
-// truetype fonts that aren't padded to alignment, define ALLOW_UNALIGNED_TRUETYPE
-
-#define ttBYTE(p)     (* (stbtt_uint8 *) (p))
-#define ttCHAR(p)     (* (stbtt_int8 *) (p))
-#define ttFixed(p)    ttLONG(p)
-
-static stbtt_uint16 ttUSHORT(stbtt_uint8 *p) { return p[0]*256 + p[1]; }
-static stbtt_int16 ttSHORT(stbtt_uint8 *p)   { return p[0]*256 + p[1]; }
-static stbtt_uint32 ttULONG(stbtt_uint8 *p)  { return (p[0]<<24) + (p[1]<<16) + (p[2]<<8) + p[3]; }
-static stbtt_int32 ttLONG(stbtt_uint8 *p)    { return (p[0]<<24) + (p[1]<<16) + (p[2]<<8) + p[3]; }
-
-#define stbtt_tag4(p,c0,c1,c2,c3) ((p)[0] == (c0) && (p)[1] == (c1) && (p)[2] == (c2) && (p)[3] == (c3))
-#define stbtt_tag(p,str)           stbtt_tag4(p,str[0],str[1],str[2],str[3])
-
-static int stbtt__isfont(stbtt_uint8 *font)
-{
-   // check the version number
-   if (stbtt_tag4(font, '1',0,0,0))  return 1; // TrueType 1
-   if (stbtt_tag(font, "typ1"))   return 1; // TrueType with type 1 font -- we don't support this!
-   if (stbtt_tag(font, "OTTO"))   return 1; // OpenType with CFF
-   if (stbtt_tag4(font, 0,1,0,0)) return 1; // OpenType 1.0
-   if (stbtt_tag(font, "true"))   return 1; // Apple specification for TrueType fonts
-   return 0;
-}
-
-// @OPTIMIZE: binary search
-static stbtt_uint32 stbtt__find_table(stbtt_uint8 *data, stbtt_uint32 fontstart, const char *tag)
-{
-   stbtt_int32 num_tables = ttUSHORT(data+fontstart+4);
-   stbtt_uint32 tabledir = fontstart + 12;
-   stbtt_int32 i;
-   for (i=0; i < num_tables; ++i) {
-      stbtt_uint32 loc = tabledir + 16*i;
-      if (stbtt_tag(data+loc+0, tag))
-         return ttULONG(data+loc+8);
-   }
-   return 0;
-}
-
-static int stbtt_GetFontOffsetForIndex_internal(unsigned char *font_collection, int index)
-{
-   // if it's just a font, there's only one valid index
-   if (stbtt__isfont(font_collection))
-      return index == 0 ? 0 : -1;
-
-   // check if it's a TTC
-   if (stbtt_tag(font_collection, "ttcf")) {
-      // version 1?
-      if (ttULONG(font_collection+4) == 0x00010000 || ttULONG(font_collection+4) == 0x00020000) {
-         stbtt_int32 n = ttLONG(font_collection+8);
-         if (index >= n)
-            return -1;
-         return ttULONG(font_collection+12+index*4);
-      }
-   }
-   return -1;
-}
-
-static int stbtt_GetNumberOfFonts_internal(unsigned char *font_collection)
-{
-   // if it's just a font, there's only one valid font
-   if (stbtt__isfont(font_collection))
-      return 1;
-
-   // check if it's a TTC
-   if (stbtt_tag(font_collection, "ttcf")) {
-      // version 1?
-      if (ttULONG(font_collection+4) == 0x00010000 || ttULONG(font_collection+4) == 0x00020000) {
-         return ttLONG(font_collection+8);
-      }
-   }
-   return 0;
-}
-
-static stbtt__buf stbtt__get_subrs(stbtt__buf cff, stbtt__buf fontdict)
-{
-   stbtt_uint32 subrsoff = 0, private_loc[2] = { 0, 0 };
-   stbtt__buf pdict;
-   stbtt__dict_get_ints(&fontdict, 18, 2, private_loc);
-   if (!private_loc[1] || !private_loc[0]) return stbtt__new_buf(NULL, 0);
-   pdict = stbtt__buf_range(&cff, private_loc[1], private_loc[0]);
-   stbtt__dict_get_ints(&pdict, 19, 1, &subrsoff);
-   if (!subrsoff) return stbtt__new_buf(NULL, 0);
-   stbtt__buf_seek(&cff, private_loc[1]+subrsoff);
-   return stbtt__cff_get_index(&cff);
-}
-
-// since most people won't use this, find this table the first time it's needed
-static int stbtt__get_svg(stbtt_fontinfo *info)
-{
-   stbtt_uint32 t;
-   if (info->svg < 0) {
-      t = stbtt__find_table(info->data, info->fontstart, "SVG ");
-      if (t) {
-         stbtt_uint32 offset = ttULONG(info->data + t + 2);
-         info->svg = t + offset;
-      } else {
-         info->svg = 0;
-      }
-   }
-   return info->svg;
-}
-
-static int stbtt_InitFont_internal(stbtt_fontinfo *info, unsigned char *data, int fontstart)
-{
-   stbtt_uint32 cmap, t;
-   stbtt_int32 i,numTables;
-
-   info->data = data;
-   info->fontstart = fontstart;
-   info->cff = stbtt__new_buf(NULL, 0);
-
-   cmap = stbtt__find_table(data, fontstart, "cmap");       // required
-   info->loca = stbtt__find_table(data, fontstart, "loca"); // required
-   info->head = stbtt__find_table(data, fontstart, "head"); // required
-   info->glyf = stbtt__find_table(data, fontstart, "glyf"); // required
-   info->hhea = stbtt__find_table(data, fontstart, "hhea"); // required
-   info->hmtx = stbtt__find_table(data, fontstart, "hmtx"); // required
-   info->kern = stbtt__find_table(data, fontstart, "kern"); // not required
-   info->gpos = stbtt__find_table(data, fontstart, "GPOS"); // not required
-
-   if (!cmap || !info->head || !info->hhea || !info->hmtx)
-      return 0;
-   if (info->glyf) {
-      // required for truetype
-      if (!info->loca) return 0;
-   } else {
-      // initialization for CFF / Type2 fonts (OTF)
-      stbtt__buf b, topdict, topdictidx;
-      stbtt_uint32 cstype = 2, charstrings = 0, fdarrayoff = 0, fdselectoff = 0;
-      stbtt_uint32 cff;
-
-      cff = stbtt__find_table(data, fontstart, "CFF ");
-      if (!cff) return 0;
-
-      info->fontdicts = stbtt__new_buf(NULL, 0);
-      info->fdselect = stbtt__new_buf(NULL, 0);
-
-      // @TODO this should use size from table (not 512MB)
-      info->cff = stbtt__new_buf(data+cff, 512*1024*1024);
-      b = info->cff;
-
-      // read the header
-      stbtt__buf_skip(&b, 2);
-      stbtt__buf_seek(&b, stbtt__buf_get8(&b)); // hdrsize
-
-      // @TODO the name INDEX could list multiple fonts,
-      // but we just use the first one.
-      stbtt__cff_get_index(&b);  // name INDEX
-      topdictidx = stbtt__cff_get_index(&b);
-      topdict = stbtt__cff_index_get(topdictidx, 0);
-      stbtt__cff_get_index(&b);  // string INDEX
-      info->gsubrs = stbtt__cff_get_index(&b);
-
-      stbtt__dict_get_ints(&topdict, 17, 1, &charstrings);
-      stbtt__dict_get_ints(&topdict, 0x100 | 6, 1, &cstype);
-      stbtt__dict_get_ints(&topdict, 0x100 | 36, 1, &fdarrayoff);
-      stbtt__dict_get_ints(&topdict, 0x100 | 37, 1, &fdselectoff);
-      info->subrs = stbtt__get_subrs(b, topdict);
-
-      // we only support Type 2 charstrings
-      if (cstype != 2) return 0;
-      if (charstrings == 0) return 0;
-
-      if (fdarrayoff) {
-         // looks like a CID font
-         if (!fdselectoff) return 0;
-         stbtt__buf_seek(&b, fdarrayoff);
-         info->fontdicts = stbtt__cff_get_index(&b);
-         info->fdselect = stbtt__buf_range(&b, fdselectoff, b.size-fdselectoff);
-      }
-
-      stbtt__buf_seek(&b, charstrings);
-      info->charstrings = stbtt__cff_get_index(&b);
-   }
-
-   t = stbtt__find_table(data, fontstart, "maxp");
-   if (t)
-      info->numGlyphs = ttUSHORT(data+t+4);
-   else
-      info->numGlyphs = 0xffff;
-
-   info->svg = -1;
-
-   // find a cmap encoding table we understand *now* to avoid searching
-   // later. (todo: could make this installable)
-   // the same regardless of glyph.
-   numTables = ttUSHORT(data + cmap + 2);
-   info->index_map = 0;
-   for (i=0; i < numTables; ++i) {
-      stbtt_uint32 encoding_record = cmap + 4 + 8 * i;
-      // find an encoding we understand:
-      switch(ttUSHORT(data+encoding_record)) {
-         case STBTT_PLATFORM_ID_MICROSOFT:
-            switch (ttUSHORT(data+encoding_record+2)) {
-               case STBTT_MS_EID_UNICODE_BMP:
-               case STBTT_MS_EID_UNICODE_FULL:
-                  // MS/Unicode
-                  info->index_map = cmap + ttULONG(data+encoding_record+4);
-                  break;
-            }
-            break;
-        case STBTT_PLATFORM_ID_UNICODE:
-            // Mac/iOS has these
-            // all the encodingIDs are unicode, so we don't bother to check it
-            info->index_map = cmap + ttULONG(data+encoding_record+4);
-            break;
-      }
-   }
-   if (info->index_map == 0)
-      return 0;
-
-   info->indexToLocFormat = ttUSHORT(data+info->head + 50);
-   return 1;
-}
-
-STBTT_DEF int stbtt_FindGlyphIndex(const stbtt_fontinfo *info, int unicode_codepoint)
-{
-   stbtt_uint8 *data = info->data;
-   stbtt_uint32 index_map = info->index_map;
-
-   stbtt_uint16 format = ttUSHORT(data + index_map + 0);
-   if (format == 0) { // apple byte encoding
-      stbtt_int32 bytes = ttUSHORT(data + index_map + 2);
-      if (unicode_codepoint < bytes-6)
-         return ttBYTE(data + index_map + 6 + unicode_codepoint);
-      return 0;
-   } else if (format == 6) {
-      stbtt_uint32 first = ttUSHORT(data + index_map + 6);
-      stbtt_uint32 count = ttUSHORT(data + index_map + 8);
-      if ((stbtt_uint32) unicode_codepoint >= first && (stbtt_uint32) unicode_codepoint < first+count)
-         return ttUSHORT(data + index_map + 10 + (unicode_codepoint - first)*2);
-      return 0;
-   } else if (format == 2) {
-      STBTT_assert(0); // @TODO: high-byte mapping for japanese/chinese/korean
-      return 0;
-   } else if (format == 4) { // standard mapping for windows fonts: binary search collection of ranges
-      stbtt_uint16 segcount = ttUSHORT(data+index_map+6) >> 1;
-      stbtt_uint16 searchRange = ttUSHORT(data+index_map+8) >> 1;
-      stbtt_uint16 entrySelector = ttUSHORT(data+index_map+10);
-      stbtt_uint16 rangeShift = ttUSHORT(data+index_map+12) >> 1;
-
-      // do a binary search of the segments
-      stbtt_uint32 endCount = index_map + 14;
-      stbtt_uint32 search = endCount;
-
-      if (unicode_codepoint > 0xffff)
-         return 0;
-
-      // they lie from endCount .. endCount + segCount
-      // but searchRange is the nearest power of two, so...
-      if (unicode_codepoint >= ttUSHORT(data + search + rangeShift*2))
-         search += rangeShift*2;
-
-      // now decrement to bias correctly to find smallest
-      search -= 2;
-      while (entrySelector) {
-         stbtt_uint16 end;
-         searchRange >>= 1;
-         end = ttUSHORT(data + search + searchRange*2);
-         if (unicode_codepoint > end)
-            search += searchRange*2;
-         --entrySelector;
-      }
-      search += 2;
-
-      {
-         stbtt_uint16 offset, start;
-         stbtt_uint16 item = (stbtt_uint16) ((search - endCount) >> 1);
-
-         STBTT_assert(unicode_codepoint <= ttUSHORT(data + endCount + 2*item));
-         start = ttUSHORT(data + index_map + 14 + segcount*2 + 2 + 2*item);
-         if (unicode_codepoint < start)
-            return 0;
-
-         offset = ttUSHORT(data + index_map + 14 + segcount*6 + 2 + 2*item);
-         if (offset == 0)
-            return (stbtt_uint16) (unicode_codepoint + ttSHORT(data + index_map + 14 + segcount*4 + 2 + 2*item));
-
-         return ttUSHORT(data + offset + (unicode_codepoint-start)*2 + index_map + 14 + segcount*6 + 2 + 2*item);
-      }
-   } else if (format == 12 || format == 13) {
-      stbtt_uint32 ngroups = ttULONG(data+index_map+12);
-      stbtt_int32 low,high;
-      low = 0; high = (stbtt_int32)ngroups;
-      // Binary search the right group.
-      while (low < high) {
-         stbtt_int32 mid = low + ((high-low) >> 1); // rounds down, so low <= mid < high
-         stbtt_uint32 start_char = ttULONG(data+index_map+16+mid*12);
-         stbtt_uint32 end_char = ttULONG(data+index_map+16+mid*12+4);
-         if ((stbtt_uint32) unicode_codepoint < start_char)
-            high = mid;
-         else if ((stbtt_uint32) unicode_codepoint > end_char)
-            low = mid+1;
-         else {
-            stbtt_uint32 start_glyph = ttULONG(data+index_map+16+mid*12+8);
-            if (format == 12)
-               return start_glyph + unicode_codepoint-start_char;
-            else // format == 13
-               return start_glyph;
-         }
-      }
-      return 0; // not found
-   }
-   // @TODO
-   STBTT_assert(0);
-   return 0;
-}
-
-STBTT_DEF int stbtt_GetCodepointShape(const stbtt_fontinfo *info, int unicode_codepoint, stbtt_vertex **vertices)
-{
-   return stbtt_GetGlyphShape(info, stbtt_FindGlyphIndex(info, unicode_codepoint), vertices);
-}
-
-static void stbtt_setvertex(stbtt_vertex *v, stbtt_uint8 type, stbtt_int32 x, stbtt_int32 y, stbtt_int32 cx, stbtt_int32 cy)
-{
-   v->type = type;
-   v->x = (stbtt_int16) x;
-   v->y = (stbtt_int16) y;
-   v->cx = (stbtt_int16) cx;
-   v->cy = (stbtt_int16) cy;
-}
-
-static int stbtt__GetGlyfOffset(const stbtt_fontinfo *info, int glyph_index)
-{
-   int g1,g2;
-
-   STBTT_assert(!info->cff.size);
-
-   if (glyph_index >= info->numGlyphs) return -1; // glyph index out of range
-   if (info->indexToLocFormat >= 2)    return -1; // unknown index->glyph map format
-
-   if (info->indexToLocFormat == 0) {
-      g1 = info->glyf + ttUSHORT(info->data + info->loca + glyph_index * 2) * 2;
-      g2 = info->glyf + ttUSHORT(info->data + info->loca + glyph_index * 2 + 2) * 2;
-   } else {
-      g1 = info->glyf + ttULONG (info->data + info->loca + glyph_index * 4);
-      g2 = info->glyf + ttULONG (info->data + info->loca + glyph_index * 4 + 4);
-   }
-
-   return g1==g2 ? -1 : g1; // if length is 0, return -1
-}
-
-static int stbtt__GetGlyphInfoT2(const stbtt_fontinfo *info, int glyph_index, int *x0, int *y0, int *x1, int *y1);
-
-STBTT_DEF int stbtt_GetGlyphBox(const stbtt_fontinfo *info, int glyph_index, int *x0, int *y0, int *x1, int *y1)
-{
-   if (info->cff.size) {
-      stbtt__GetGlyphInfoT2(info, glyph_index, x0, y0, x1, y1);
-   } else {
-      int g = stbtt__GetGlyfOffset(info, glyph_index);
-      if (g < 0) return 0;
-
-      if (x0) *x0 = ttSHORT(info->data + g + 2);
-      if (y0) *y0 = ttSHORT(info->data + g + 4);
-      if (x1) *x1 = ttSHORT(info->data + g + 6);
-      if (y1) *y1 = ttSHORT(info->data + g + 8);
-   }
-   return 1;
-}
-
-STBTT_DEF int stbtt_GetCodepointBox(const stbtt_fontinfo *info, int codepoint, int *x0, int *y0, int *x1, int *y1)
-{
-   return stbtt_GetGlyphBox(info, stbtt_FindGlyphIndex(info,codepoint), x0,y0,x1,y1);
-}
-
-STBTT_DEF int stbtt_IsGlyphEmpty(const stbtt_fontinfo *info, int glyph_index)
-{
-   stbtt_int16 numberOfContours;
-   int g;
-   if (info->cff.size)
-      return stbtt__GetGlyphInfoT2(info, glyph_index, NULL, NULL, NULL, NULL) == 0;
-   g = stbtt__GetGlyfOffset(info, glyph_index);
-   if (g < 0) return 1;
-   numberOfContours = ttSHORT(info->data + g);
-   return numberOfContours == 0;
-}
-
-static int stbtt__close_shape(stbtt_vertex *vertices, int num_vertices, int was_off, int start_off,
-    stbtt_int32 sx, stbtt_int32 sy, stbtt_int32 scx, stbtt_int32 scy, stbtt_int32 cx, stbtt_int32 cy)
-{
-   if (start_off) {
-      if (was_off)
-         stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve, (cx+scx)>>1, (cy+scy)>>1, cx,cy);
-      stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve, sx,sy,scx,scy);
-   } else {
-      if (was_off)
-         stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve,sx,sy,cx,cy);
-      else
-         stbtt_setvertex(&vertices[num_vertices++], STBTT_vline,sx,sy,0,0);
-   }
-   return num_vertices;
-}
-
-static int stbtt__GetGlyphShapeTT(const stbtt_fontinfo *info, int glyph_index, stbtt_vertex **pvertices)
-{
-   stbtt_int16 numberOfContours;
-   stbtt_uint8 *endPtsOfContours;
-   stbtt_uint8 *data = info->data;
-   stbtt_vertex *vertices=0;
-   int num_vertices=0;
-   int g = stbtt__GetGlyfOffset(info, glyph_index);
-
-   *pvertices = NULL;
-
-   if (g < 0) return 0;
-
-   numberOfContours = ttSHORT(data + g);
-
-   if (numberOfContours > 0) {
-      stbtt_uint8 flags=0,flagcount;
-      stbtt_int32 ins, i,j=0,m,n, next_move, was_off=0, off, start_off=0;
-      stbtt_int32 x,y,cx,cy,sx,sy, scx,scy;
-      stbtt_uint8 *points;
-      endPtsOfContours = (data + g + 10);
-      ins = ttUSHORT(data + g + 10 + numberOfContours * 2);
-      points = data + g + 10 + numberOfContours * 2 + 2 + ins;
-
-      n = 1+ttUSHORT(endPtsOfContours + numberOfContours*2-2);
-
-      m = n + 2*numberOfContours;  // a loose bound on how many vertices we might need
-      vertices = (stbtt_vertex *) STBTT_malloc(m * sizeof(vertices[0]), info->userdata);
-      if (vertices == 0)
-         return 0;
-
-      next_move = 0;
-      flagcount=0;
-
-      // in first pass, we load uninterpreted data into the allocated array
-      // above, shifted to the end of the array so we won't overwrite it when
-      // we create our final data starting from the front
-
-      off = m - n; // starting offset for uninterpreted data, regardless of how m ends up being calculated
-
-      // first load flags
-
-      for (i=0; i < n; ++i) {
-         if (flagcount == 0) {
-            flags = *points++;
-            if (flags & 8)
-               flagcount = *points++;
-         } else
-            --flagcount;
-         vertices[off+i].type = flags;
-      }
-
-      // now load x coordinates
-      x=0;
-      for (i=0; i < n; ++i) {
-         flags = vertices[off+i].type;
-         if (flags & 2) {
-            stbtt_int16 dx = *points++;
-            x += (flags & 16) ? dx : -dx; // ???
-         } else {
-            if (!(flags & 16)) {
-               x = x + (stbtt_int16) (points[0]*256 + points[1]);
-               points += 2;
-            }
-         }
-         vertices[off+i].x = (stbtt_int16) x;
-      }
-
-      // now load y coordinates
-      y=0;
-      for (i=0; i < n; ++i) {
-         flags = vertices[off+i].type;
-         if (flags & 4) {
-            stbtt_int16 dy = *points++;
-            y += (flags & 32) ? dy : -dy; // ???
-         } else {
-            if (!(flags & 32)) {
-               y = y + (stbtt_int16) (points[0]*256 + points[1]);
-               points += 2;
-            }
-         }
-         vertices[off+i].y = (stbtt_int16) y;
-      }
-
-      // now convert them to our format
-      num_vertices=0;
-      sx = sy = cx = cy = scx = scy = 0;
-      for (i=0; i < n; ++i) {
-         flags = vertices[off+i].type;
-         x     = (stbtt_int16) vertices[off+i].x;
-         y     = (stbtt_int16) vertices[off+i].y;
-
-         if (next_move == i) {
-            if (i != 0)
-               num_vertices = stbtt__close_shape(vertices, num_vertices, was_off, start_off, sx,sy,scx,scy,cx,cy);
-
-            // now start the new one
-            start_off = !(flags & 1);
-            if (start_off) {
-               // if we start off with an off-curve point, then when we need to find a point on the curve
-               // where we can start, and we need to save some state for when we wraparound.
-               scx = x;
-               scy = y;
-               if (!(vertices[off+i+1].type & 1)) {
-                  // next point is also a curve point, so interpolate an on-point curve
-                  sx = (x + (stbtt_int32) vertices[off+i+1].x) >> 1;
-                  sy = (y + (stbtt_int32) vertices[off+i+1].y) >> 1;
-               } else {
-                  // otherwise just use the next point as our start point
-                  sx = (stbtt_int32) vertices[off+i+1].x;
-                  sy = (stbtt_int32) vertices[off+i+1].y;
-                  ++i; // we're using point i+1 as the starting point, so skip it
-               }
-            } else {
-               sx = x;
-               sy = y;
-            }
-            stbtt_setvertex(&vertices[num_vertices++], STBTT_vmove,sx,sy,0,0);
-            was_off = 0;
-            next_move = 1 + ttUSHORT(endPtsOfContours+j*2);
-            ++j;
-         } else {
-            if (!(flags & 1)) { // if it's a curve
-               if (was_off) // two off-curve control points in a row means interpolate an on-curve midpoint
-                  stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve, (cx+x)>>1, (cy+y)>>1, cx, cy);
-               cx = x;
-               cy = y;
-               was_off = 1;
-            } else {
-               if (was_off)
-                  stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve, x,y, cx, cy);
-               else
-                  stbtt_setvertex(&vertices[num_vertices++], STBTT_vline, x,y,0,0);
-               was_off = 0;
-            }
-         }
-      }
-      num_vertices = stbtt__close_shape(vertices, num_vertices, was_off, start_off, sx,sy,scx,scy,cx,cy);
-   } else if (numberOfContours < 0) {
-      // Compound shapes.
-      int more = 1;
-      stbtt_uint8 *comp = data + g + 10;
-      num_vertices = 0;
-      vertices = 0;
-      while (more) {
-         stbtt_uint16 flags, gidx;
-         int comp_num_verts = 0, i;
-         stbtt_vertex *comp_verts = 0, *tmp = 0;
-         double mtx[6] = {1,0,0,1,0,0}, m, n;
-
-         flags = ttSHORT(comp); comp+=2;
-         gidx = ttSHORT(comp); comp+=2;
-
-         if (flags & 2) { // XY values
-            if (flags & 1) { // shorts
-               mtx[4] = ttSHORT(comp); comp+=2;
-               mtx[5] = ttSHORT(comp); comp+=2;
-            } else {
-               mtx[4] = ttCHAR(comp); comp+=1;
-               mtx[5] = ttCHAR(comp); comp+=1;
-            }
-         }
-         else {
-            // @TODO handle matching point
-            STBTT_assert(0);
-         }
-         if (flags & (1<<3)) { // WE_HAVE_A_SCALE
-            mtx[0] = mtx[3] = ttSHORT(comp)/16384.0f; comp+=2;
-            mtx[1] = mtx[2] = 0;
-         } else if (flags & (1<<6)) { // WE_HAVE_AN_X_AND_YSCALE
-            mtx[0] = ttSHORT(comp)/16384.0f; comp+=2;
-            mtx[1] = mtx[2] = 0;
-            mtx[3] = ttSHORT(comp)/16384.0f; comp+=2;
-         } else if (flags & (1<<7)) { // WE_HAVE_A_TWO_BY_TWO
-            mtx[0] = ttSHORT(comp)/16384.0f; comp+=2;
-            mtx[1] = ttSHORT(comp)/16384.0f; comp+=2;
-            mtx[2] = ttSHORT(comp)/16384.0f; comp+=2;
-            mtx[3] = ttSHORT(comp)/16384.0f; comp+=2;
-         }
-
-         // Find transformation scales.
-         m = (double) STBTT_sqrt(mtx[0]*mtx[0] + mtx[1]*mtx[1]);
-         n = (double) STBTT_sqrt(mtx[2]*mtx[2] + mtx[3]*mtx[3]);
-
-         // Get indexed glyph.
-         comp_num_verts = stbtt_GetGlyphShape(info, gidx, &comp_verts);
-         if (comp_num_verts > 0) {
-            // Transform vertices.
-            for (i = 0; i < comp_num_verts; ++i) {
-               stbtt_vertex* v = &comp_verts[i];
-               stbtt_vertex_type x,y;
-               x=v->x; y=v->y;
-               v->x = (stbtt_vertex_type)(m * (mtx[0]*x + mtx[2]*y + mtx[4]));
-               v->y = (stbtt_vertex_type)(n * (mtx[1]*x + mtx[3]*y + mtx[5]));
-               x=v->cx; y=v->cy;
-               v->cx = (stbtt_vertex_type)(m * (mtx[0]*x + mtx[2]*y + mtx[4]));
-               v->cy = (stbtt_vertex_type)(n * (mtx[1]*x + mtx[3]*y + mtx[5]));
-            }
-            // Append vertices.
-            tmp = (stbtt_vertex*)STBTT_malloc((num_vertices+comp_num_verts)*sizeof(stbtt_vertex), info->userdata);
-            if (!tmp) {
-               if (vertices) STBTT_free(vertices, info->userdata);
-               if (comp_verts) STBTT_free(comp_verts, info->userdata);
-               return 0;
-            }
-            if (num_vertices > 0) STBTT_memcpy(tmp, vertices, num_vertices*sizeof(stbtt_vertex));
-            STBTT_memcpy(tmp+num_vertices, comp_verts, comp_num_verts*sizeof(stbtt_vertex));
-            if (vertices) STBTT_free(vertices, info->userdata);
-            vertices = tmp;
-            STBTT_free(comp_verts, info->userdata);
-            num_vertices += comp_num_verts;
-         }
-         // More components ?
-         more = flags & (1<<5);
-      }
-   } else {
-      // numberOfCounters == 0, do nothing
-   }
-
-   *pvertices = vertices;
-   return num_vertices;
-}
-
-typedef struct
-{
-   int bounds;
-   int started;
-   double first_x, first_y;
-   double x, y;
-   stbtt_int32 min_x, max_x, min_y, max_y;
-
-   stbtt_vertex *pvertices;
-   int num_vertices;
-} stbtt__csctx;
-
-#define STBTT__CSCTX_INIT(bounds) {bounds,0, 0,0, 0,0, 0,0,0,0, NULL, 0}
-
-static void stbtt__track_vertex(stbtt__csctx *c, stbtt_int32 x, stbtt_int32 y)
-{
-   if (x > c->max_x || !c->started) c->max_x = x;
-   if (y > c->max_y || !c->started) c->max_y = y;
-   if (x < c->min_x || !c->started) c->min_x = x;
-   if (y < c->min_y || !c->started) c->min_y = y;
-   c->started = 1;
-}
-
-static void stbtt__csctx_v(stbtt__csctx *c, stbtt_uint8 type, stbtt_int32 x, stbtt_int32 y, stbtt_int32 cx, stbtt_int32 cy, stbtt_int32 cx1, stbtt_int32 cy1)
-{
-   if (c->bounds) {
-      stbtt__track_vertex(c, x, y);
-      if (type == STBTT_vcubic) {
-         stbtt__track_vertex(c, cx, cy);
-         stbtt__track_vertex(c, cx1, cy1);
-      }
-   } else {
-      stbtt_setvertex(&c->pvertices[c->num_vertices], type, x, y, cx, cy);
-      c->pvertices[c->num_vertices].cx1 = (stbtt_int16) cx1;
-      c->pvertices[c->num_vertices].cy1 = (stbtt_int16) cy1;
-   }
-   c->num_vertices++;
-}
-
-static void stbtt__csctx_close_shape(stbtt__csctx *ctx)
-{
-   if (ctx->first_x != ctx->x || ctx->first_y != ctx->y)
-      stbtt__csctx_v(ctx, STBTT_vline, (int)ctx->first_x, (int)ctx->first_y, 0, 0, 0, 0);
-}
-
-static void stbtt__csctx_rmove_to(stbtt__csctx *ctx, double dx, double dy)
-{
-   stbtt__csctx_close_shape(ctx);
-   ctx->first_x = ctx->x = ctx->x + dx;
-   ctx->first_y = ctx->y = ctx->y + dy;
-   stbtt__csctx_v(ctx, STBTT_vmove, (int)ctx->x, (int)ctx->y, 0, 0, 0, 0);
-}
-
-static void stbtt__csctx_rline_to(stbtt__csctx *ctx, double dx, double dy)
-{
-   ctx->x += dx;
-   ctx->y += dy;
-   stbtt__csctx_v(ctx, STBTT_vline, (int)ctx->x, (int)ctx->y, 0, 0, 0, 0);
-}
-
-static void stbtt__csctx_rccurve_to(stbtt__csctx *ctx, double dx1, double dy1, double dx2, double dy2, double dx3, double dy3)
-{
-   double cx1 = ctx->x + dx1;
-   double cy1 = ctx->y + dy1;
-   double cx2 = cx1 + dx2;
-   double cy2 = cy1 + dy2;
-   ctx->x = cx2 + dx3;
-   ctx->y = cy2 + dy3;
-   stbtt__csctx_v(ctx, STBTT_vcubic, (int)ctx->x, (int)ctx->y, (int)cx1, (int)cy1, (int)cx2, (int)cy2);
-}
-
-static stbtt__buf stbtt__get_subr(stbtt__buf idx, int n)
-{
-   int count = stbtt__cff_index_count(&idx);
-   int bias = 107;
-   if (count >= 33900)
-      bias = 32768;
-   else if (count >= 1240)
-      bias = 1131;
-   n += bias;
-   if (n < 0 || n >= count)
-      return stbtt__new_buf(NULL, 0);
-   return stbtt__cff_index_get(idx, n);
-}
-
-static stbtt__buf stbtt__cid_get_glyph_subrs(const stbtt_fontinfo *info, int glyph_index)
-{
-   stbtt__buf fdselect = info->fdselect;
-   int nranges, start, end, v, fmt, fdselector = -1, i;
-
-   stbtt__buf_seek(&fdselect, 0);
-   fmt = stbtt__buf_get8(&fdselect);
-   if (fmt == 0) {
-      // untested
-      stbtt__buf_skip(&fdselect, glyph_index);
-      fdselector = stbtt__buf_get8(&fdselect);
-   } else if (fmt == 3) {
-      nranges = stbtt__buf_get16(&fdselect);
-      start = stbtt__buf_get16(&fdselect);
-      for (i = 0; i < nranges; i++) {
-         v = stbtt__buf_get8(&fdselect);
-         end = stbtt__buf_get16(&fdselect);
-         if (glyph_index >= start && glyph_index < end) {
-            fdselector = v;
-            break;
-         }
-         start = end;
-      }
-   }
-   if (fdselector == -1) stbtt__new_buf(NULL, 0);
-   return stbtt__get_subrs(info->cff, stbtt__cff_index_get(info->fontdicts, fdselector));
-}
-
-static int stbtt__run_charstring(const stbtt_fontinfo *info, int glyph_index, stbtt__csctx *c)
-{
-   int in_header = 1, maskbits = 0, subr_stack_height = 0, sp = 0, v, i, b0;
-   int has_subrs = 0, clear_stack;
-   double s[48];
-   stbtt__buf subr_stack[10], subrs = info->subrs, b;
-   double f;
-
-#define STBTT__CSERR(s) (0)
-
-   // this currently ignores the initial width value, which isn't needed if we have hmtx
-   b = stbtt__cff_index_get(info->charstrings, glyph_index);
-   while (b.cursor < b.size) {
-      i = 0;
-      clear_stack = 1;
-      b0 = stbtt__buf_get8(&b);
-      switch (b0) {
-      // @TODO implement hinting
-      case 0x13: // hintmask
-      case 0x14: // cntrmask
-         if (in_header)
-            maskbits += (sp / 2); // implicit "vstem"
-         in_header = 0;
-         stbtt__buf_skip(&b, (maskbits + 7) / 8);
-         break;
-
-      case 0x01: // hstem
-      case 0x03: // vstem
-      case 0x12: // hstemhm
-      case 0x17: // vstemhm
-         maskbits += (sp / 2);
-         break;
-
-      case 0x15: // rmoveto
-         in_header = 0;
-         if (sp < 2) return STBTT__CSERR("rmoveto stack");
-         stbtt__csctx_rmove_to(c, s[sp-2], s[sp-1]);
-         break;
-      case 0x04: // vmoveto
-         in_header = 0;
-         if (sp < 1) return STBTT__CSERR("vmoveto stack");
-         stbtt__csctx_rmove_to(c, 0, s[sp-1]);
-         break;
-      case 0x16: // hmoveto
-         in_header = 0;
-         if (sp < 1) return STBTT__CSERR("hmoveto stack");
-         stbtt__csctx_rmove_to(c, s[sp-1], 0);
-         break;
-
-      case 0x05: // rlineto
-         if (sp < 2) return STBTT__CSERR("rlineto stack");
-         for (; i + 1 < sp; i += 2)
-            stbtt__csctx_rline_to(c, s[i], s[i+1]);
-         break;
-
-      // hlineto/vlineto and vhcurveto/hvcurveto alternate horizontal and vertical
-      // starting from a different place.
-
-      case 0x07: // vlineto
-         if (sp < 1) return STBTT__CSERR("vlineto stack");
-         goto vlineto;
-      case 0x06: // hlineto
-         if (sp < 1) return STBTT__CSERR("hlineto stack");
-         for (;;) {
-            if (i >= sp) break;
-            stbtt__csctx_rline_to(c, s[i], 0);
-            i++;
-      vlineto:
-            if (i >= sp) break;
-            stbtt__csctx_rline_to(c, 0, s[i]);
-            i++;
-         }
-         break;
-
-      case 0x1F: // hvcurveto
-         if (sp < 4) return STBTT__CSERR("hvcurveto stack");
-         goto hvcurveto;
-      case 0x1E: // vhcurveto
-         if (sp < 4) return STBTT__CSERR("vhcurveto stack");
-         for (;;) {
-            if (i + 3 >= sp) break;
-            stbtt__csctx_rccurve_to(c, 0, s[i], s[i+1], s[i+2], s[i+3], (sp - i == 5) ? s[i + 4] : 0.0f);
-            i += 4;
-      hvcurveto:
-            if (i + 3 >= sp) break;
-            stbtt__csctx_rccurve_to(c, s[i], 0, s[i+1], s[i+2], (sp - i == 5) ? s[i+4] : 0.0f, s[i+3]);
-            i += 4;
-         }
-         break;
-
-      case 0x08: // rrcurveto
-         if (sp < 6) return STBTT__CSERR("rcurveline stack");
-         for (; i + 5 < sp; i += 6)
-            stbtt__csctx_rccurve_to(c, s[i], s[i+1], s[i+2], s[i+3], s[i+4], s[i+5]);
-         break;
-
-      case 0x18: // rcurveline
-         if (sp < 8) return STBTT__CSERR("rcurveline stack");
-         for (; i + 5 < sp - 2; i += 6)
-            stbtt__csctx_rccurve_to(c, s[i], s[i+1], s[i+2], s[i+3], s[i+4], s[i+5]);
-         if (i + 1 >= sp) return STBTT__CSERR("rcurveline stack");
-         stbtt__csctx_rline_to(c, s[i], s[i+1]);
-         break;
-
-      case 0x19: // rlinecurve
-         if (sp < 8) return STBTT__CSERR("rlinecurve stack");
-         for (; i + 1 < sp - 6; i += 2)
-            stbtt__csctx_rline_to(c, s[i], s[i+1]);
-         if (i + 5 >= sp) return STBTT__CSERR("rlinecurve stack");
-         stbtt__csctx_rccurve_to(c, s[i], s[i+1], s[i+2], s[i+3], s[i+4], s[i+5]);
-         break;
-
-      case 0x1A: // vvcurveto
-      case 0x1B: // hhcurveto
-         if (sp < 4) return STBTT__CSERR("(vv|hh)curveto stack");
-         f = 0.0;
-         if (sp & 1) { f = s[i]; i++; }
-         for (; i + 3 < sp; i += 4) {
-            if (b0 == 0x1B)
-               stbtt__csctx_rccurve_to(c, s[i], f, s[i+1], s[i+2], s[i+3], 0.0);
-            else
-               stbtt__csctx_rccurve_to(c, f, s[i], s[i+1], s[i+2], 0.0, s[i+3]);
-            f = 0.0;
-         }
-         break;
-
-      case 0x0A: // callsubr
-         if (!has_subrs) {
-            if (info->fdselect.size)
-               subrs = stbtt__cid_get_glyph_subrs(info, glyph_index);
-            has_subrs = 1;
-         }
-         // fallthrough
-      case 0x1D: // callgsubr
-         if (sp < 1) return STBTT__CSERR("call(g|)subr stack");
-         v = (int) s[--sp];
-         if (subr_stack_height >= 10) return STBTT__CSERR("recursion limit");
-         subr_stack[subr_stack_height++] = b;
-         b = stbtt__get_subr(b0 == 0x0A ? subrs : info->gsubrs, v);
-         if (b.size == 0) return STBTT__CSERR("subr not found");
-         b.cursor = 0;
-         clear_stack = 0;
-         break;
-
-      case 0x0B: // return
-         if (subr_stack_height <= 0) return STBTT__CSERR("return outside subr");
-         b = subr_stack[--subr_stack_height];
-         clear_stack = 0;
-         break;
-
-      case 0x0E: // endchar
-         stbtt__csctx_close_shape(c);
-         return 1;
-
-      case 0x0C: { // two-byte escape
-         double dx1, dx2, dx3, dx4, dx5, dx6, dy1, dy2, dy3, dy4, dy5, dy6;
-         double dx, dy;
-         int b1 = stbtt__buf_get8(&b);
-         switch (b1) {
-         // @TODO These "flex" implementations ignore the flex-depth and resolution,
-         // and always draw beziers.
-         case 0x22: // hflex
-            if (sp < 7) return STBTT__CSERR("hflex stack");
-            dx1 = s[0];
-            dx2 = s[1];
-            dy2 = s[2];
-            dx3 = s[3];
-            dx4 = s[4];
-            dx5 = s[5];
-            dx6 = s[6];
-            stbtt__csctx_rccurve_to(c, dx1, 0, dx2, dy2, dx3, 0);
-            stbtt__csctx_rccurve_to(c, dx4, 0, dx5, -dy2, dx6, 0);
-            break;
-
-         case 0x23: // flex
-            if (sp < 13) return STBTT__CSERR("flex stack");
-            dx1 = s[0];
-            dy1 = s[1];
-            dx2 = s[2];
-            dy2 = s[3];
-            dx3 = s[4];
-            dy3 = s[5];
-            dx4 = s[6];
-            dy4 = s[7];
-            dx5 = s[8];
-            dy5 = s[9];
-            dx6 = s[10];
-            dy6 = s[11];
-            //fd is s[12]
-            stbtt__csctx_rccurve_to(c, dx1, dy1, dx2, dy2, dx3, dy3);
-            stbtt__csctx_rccurve_to(c, dx4, dy4, dx5, dy5, dx6, dy6);
-            break;
-
-         case 0x24: // hflex1
-            if (sp < 9) return STBTT__CSERR("hflex1 stack");
-            dx1 = s[0];
-            dy1 = s[1];
-            dx2 = s[2];
-            dy2 = s[3];
-            dx3 = s[4];
-            dx4 = s[5];
-            dx5 = s[6];
-            dy5 = s[7];
-            dx6 = s[8];
-            stbtt__csctx_rccurve_to(c, dx1, dy1, dx2, dy2, dx3, 0);
-            stbtt__csctx_rccurve_to(c, dx4, 0, dx5, dy5, dx6, -(dy1+dy2+dy5));
-            break;
-
-         case 0x25: // flex1
-            if (sp < 11) return STBTT__CSERR("flex1 stack");
-            dx1 = s[0];
-            dy1 = s[1];
-            dx2 = s[2];
-            dy2 = s[3];
-            dx3 = s[4];
-            dy3 = s[5];
-            dx4 = s[6];
-            dy4 = s[7];
-            dx5 = s[8];
-            dy5 = s[9];
-            dx6 = dy6 = s[10];
-            dx = dx1+dx2+dx3+dx4+dx5;
-            dy = dy1+dy2+dy3+dy4+dy5;
-            if (STBTT_fabs(dx) > STBTT_fabs(dy))
-               dy6 = -dy;
-            else
-               dx6 = -dx;
-            stbtt__csctx_rccurve_to(c, dx1, dy1, dx2, dy2, dx3, dy3);
-            stbtt__csctx_rccurve_to(c, dx4, dy4, dx5, dy5, dx6, dy6);
-            break;
-
-         default:
-            return STBTT__CSERR("unimplemented");
-         }
-      } break;
-
-      default:
-         if (b0 != 255 && b0 != 28 && (b0 < 32 || b0 > 254))
-            return STBTT__CSERR("reserved operator");
-
-         // push immediate
-         if (b0 == 255) {
-            f = (double)(stbtt_int32)stbtt__buf_get32(&b) / 0x10000;
-         } else {
-            stbtt__buf_skip(&b, -1);
-            f = (double)(stbtt_int16)stbtt__cff_int(&b);
-         }
-         if (sp >= 48) return STBTT__CSERR("push stack overflow");
-         s[sp++] = f;
-         clear_stack = 0;
-         break;
-      }
-      if (clear_stack) sp = 0;
-   }
-   return STBTT__CSERR("no endchar");
-
-#undef STBTT__CSERR
-}
-
-static int stbtt__GetGlyphShapeT2(const stbtt_fontinfo *info, int glyph_index, stbtt_vertex **pvertices)
-{
-   // runs the charstring twice, once to count and once to output (to avoid realloc)
-   stbtt__csctx count_ctx = STBTT__CSCTX_INIT(1);
-   stbtt__csctx output_ctx = STBTT__CSCTX_INIT(0);
-   if (stbtt__run_charstring(info, glyph_index, &count_ctx)) {
-      *pvertices = (stbtt_vertex*)STBTT_malloc(count_ctx.num_vertices*sizeof(stbtt_vertex), info->userdata);
-      output_ctx.pvertices = *pvertices;
-      if (stbtt__run_charstring(info, glyph_index, &output_ctx)) {
-         STBTT_assert(output_ctx.num_vertices == count_ctx.num_vertices);
-         return output_ctx.num_vertices;
-      }
-   }
-   *pvertices = NULL;
-   return 0;
-}
-
-static int stbtt__GetGlyphInfoT2(const stbtt_fontinfo *info, int glyph_index, int *x0, int *y0, int *x1, int *y1)
-{
-   stbtt__csctx c = STBTT__CSCTX_INIT(1);
-   int r = stbtt__run_charstring(info, glyph_index, &c);
-   if (x0)  *x0 = r ? c.min_x : 0;
-   if (y0)  *y0 = r ? c.min_y : 0;
-   if (x1)  *x1 = r ? c.max_x : 0;
-   if (y1)  *y1 = r ? c.max_y : 0;
-   return r ? c.num_vertices : 0;
-}
-
-STBTT_DEF int stbtt_GetGlyphShape(const stbtt_fontinfo *info, int glyph_index, stbtt_vertex **pvertices)
-{
-   if (!info->cff.size)
-      return stbtt__GetGlyphShapeTT(info, glyph_index, pvertices);
-   else
-      return stbtt__GetGlyphShapeT2(info, glyph_index, pvertices);
-}
-
-STBTT_DEF void stbtt_GetGlyphHMetrics(const stbtt_fontinfo *info, int glyph_index, int *advanceWidth, int *leftSideBearing)
-{
-   stbtt_uint16 numOfLongHorMetrics = ttUSHORT(info->data+info->hhea + 34);
-   if (glyph_index < numOfLongHorMetrics) {
-      if (advanceWidth)     *advanceWidth    = ttSHORT(info->data + info->hmtx + 4*glyph_index);
-      if (leftSideBearing)  *leftSideBearing = ttSHORT(info->data + info->hmtx + 4*glyph_index + 2);
-   } else {
-      if (advanceWidth)     *advanceWidth    = ttSHORT(info->data + info->hmtx + 4*(numOfLongHorMetrics-1));
-      if (leftSideBearing)  *leftSideBearing = ttSHORT(info->data + info->hmtx + 4*numOfLongHorMetrics + 2*(glyph_index - numOfLongHorMetrics));
-   }
-}
-
-STBTT_DEF int  stbtt_GetKerningTableLength(const stbtt_fontinfo *info)
-{
-   stbtt_uint8 *data = info->data + info->kern;
-
-   // we only look at the first table. it must be 'horizontal' and format 0.
-   if (!info->kern)
-      return 0;
-   if (ttUSHORT(data+2) < 1) // number of tables, need at least 1
-      return 0;
-   if (ttUSHORT(data+8) != 1) // horizontal flag must be set in format
-      return 0;
-
-   return ttUSHORT(data+10);
-}
-
-STBTT_DEF int stbtt_GetKerningTable(const stbtt_fontinfo *info, stbtt_kerningentry* table, int table_length)
-{
-   stbtt_uint8 *data = info->data + info->kern;
-   int k, length;
-
-   // we only look at the first table. it must be 'horizontal' and format 0.
-   if (!info->kern)
-      return 0;
-   if (ttUSHORT(data+2) < 1) // number of tables, need at least 1
-      return 0;
-   if (ttUSHORT(data+8) != 1) // horizontal flag must be set in format
-      return 0;
-
-   length = ttUSHORT(data+10);
-   if (table_length < length)
-      length = table_length;
-
-   for (k = 0; k < length; k++)
-   {
-      table[k].glyph1 = ttUSHORT(data+18+(k*6));
-      table[k].glyph2 = ttUSHORT(data+20+(k*6));
-      table[k].advance = ttSHORT(data+22+(k*6));
-   }
-
-   return length;
-}
-
-static int  stbtt__GetGlyphKernInfoAdvance(const stbtt_fontinfo *info, int glyph1, int glyph2)
-{
-   stbtt_uint8 *data = info->data + info->kern;
-   stbtt_uint32 needle, straw;
-   int l, r, m;
-
-   // we only look at the first table. it must be 'horizontal' and format 0.
-   if (!info->kern)
-      return 0;
-   if (ttUSHORT(data+2) < 1) // number of tables, need at least 1
-      return 0;
-   if (ttUSHORT(data+8) != 1) // horizontal flag must be set in format
-      return 0;
-
-   l = 0;
-   r = ttUSHORT(data+10) - 1;
-   needle = glyph1 << 16 | glyph2;
-   while (l <= r) {
-      m = (l + r) >> 1;
-      straw = ttULONG(data+18+(m*6)); // note: unaligned read
-      if (needle < straw)
-         r = m - 1;
-      else if (needle > straw)
-         l = m + 1;
-      else
-         return ttSHORT(data+22+(m*6));
-   }
-   return 0;
-}
-
-static stbtt_int32  stbtt__GetCoverageIndex(stbtt_uint8 *coverageTable, int glyph)
-{
-    stbtt_uint16 coverageFormat = ttUSHORT(coverageTable);
-    switch(coverageFormat) {
-        case 1: {
-            stbtt_uint16 glyphCount = ttUSHORT(coverageTable + 2);
-
-            // Binary search.
-            stbtt_int32 l=0, r=glyphCount-1, m;
-            int straw, needle=glyph;
-            while (l <= r) {
-                stbtt_uint8 *glyphArray = coverageTable + 4;
-                stbtt_uint16 glyphID;
-                m = (l + r) >> 1;
-                glyphID = ttUSHORT(glyphArray + 2 * m);
-                straw = glyphID;
-                if (needle < straw)
-                    r = m - 1;
-                else if (needle > straw)
-                    l = m + 1;
-                else {
-                     return m;
-                }
-            }
-        } break;
-
-        case 2: {
-            stbtt_uint16 rangeCount = ttUSHORT(coverageTable + 2);
-            stbtt_uint8 *rangeArray = coverageTable + 4;
-
-            // Binary search.
-            stbtt_int32 l=0, r=rangeCount-1, m;
-            int strawStart, strawEnd, needle=glyph;
-            while (l <= r) {
-                stbtt_uint8 *rangeRecord;
-                m = (l + r) >> 1;
-                rangeRecord = rangeArray + 6 * m;
-                strawStart = ttUSHORT(rangeRecord);
-                strawEnd = ttUSHORT(rangeRecord + 2);
-                if (needle < strawStart)
-                    r = m - 1;
-                else if (needle > strawEnd)
-                    l = m + 1;
-                else {
-                    stbtt_uint16 startCoverageIndex = ttUSHORT(rangeRecord + 4);
-                    return startCoverageIndex + glyph - strawStart;
-                }
-            }
-        } break;
-
-        default: {
-            // There are no other cases.
-            STBTT_assert(0);
-        } break;
-    }
-
-    return -1;
-}
-
-static stbtt_int32  stbtt__GetGlyphClass(stbtt_uint8 *classDefTable, int glyph)
-{
-    stbtt_uint16 classDefFormat = ttUSHORT(classDefTable);
-    switch(classDefFormat)
-    {
-        case 1: {
-            stbtt_uint16 startGlyphID = ttUSHORT(classDefTable + 2);
-            stbtt_uint16 glyphCount = ttUSHORT(classDefTable + 4);
-            stbtt_uint8 *classDef1ValueArray = classDefTable + 6;
-
-            if (glyph >= startGlyphID && glyph < startGlyphID + glyphCount)
-                return (stbtt_int32)ttUSHORT(classDef1ValueArray + 2 * (glyph - startGlyphID));
-
-            classDefTable = classDef1ValueArray + 2 * glyphCount;
-        } break;
-
-        case 2: {
-            stbtt_uint16 classRangeCount = ttUSHORT(classDefTable + 2);
-            stbtt_uint8 *classRangeRecords = classDefTable + 4;
-
-            // Binary search.
-            stbtt_int32 l=0, r=classRangeCount-1, m;
-            int strawStart, strawEnd, needle=glyph;
-            while (l <= r) {
-                stbtt_uint8 *classRangeRecord;
-                m = (l + r) >> 1;
-                classRangeRecord = classRangeRecords + 6 * m;
-                strawStart = ttUSHORT(classRangeRecord);
-                strawEnd = ttUSHORT(classRangeRecord + 2);
-                if (needle < strawStart)
-                    r = m - 1;
-                else if (needle > strawEnd)
-                    l = m + 1;
-                else
-                    return (stbtt_int32)ttUSHORT(classRangeRecord + 4);
-            }
-
-            classDefTable = classRangeRecords + 6 * classRangeCount;
-        } break;
-
-        default: {
-            // There are no other cases.
-            STBTT_assert(0);
-        } break;
-    }
-
-    return -1;
-}
-
-// Define to STBTT_assert(x) if you want to break on unimplemented formats.
-#define STBTT_GPOS_TODO_assert(x)
-
-static stbtt_int32  stbtt__GetGlyphGPOSInfoAdvance(const stbtt_fontinfo *info, int glyph1, int glyph2)
-{
-    stbtt_uint16 lookupListOffset;
-    stbtt_uint8 *lookupList;
-    stbtt_uint16 lookupCount;
-    stbtt_uint8 *data;
-    stbtt_int32 i;
-
-    if (!info->gpos) return 0;
-
-    data = info->data + info->gpos;
-
-    if (ttUSHORT(data+0) != 1) return 0; // Major version 1
-    if (ttUSHORT(data+2) != 0) return 0; // Minor version 0
-
-    lookupListOffset = ttUSHORT(data+8);
-    lookupList = data + lookupListOffset;
-    lookupCount = ttUSHORT(lookupList);
-
-    for (i=0; i<lookupCount; ++i) {
-        stbtt_uint16 lookupOffset = ttUSHORT(lookupList + 2 + 2 * i);
-        stbtt_uint8 *lookupTable = lookupList + lookupOffset;
-
-        stbtt_uint16 lookupType = ttUSHORT(lookupTable);
-        stbtt_uint16 subTableCount = ttUSHORT(lookupTable + 4);
-        stbtt_uint8 *subTableOffsets = lookupTable + 6;
-        switch(lookupType) {
-            case 2: { // Pair Adjustment Positioning Subtable
-                stbtt_int32 sti;
-                for (sti=0; sti<subTableCount; sti++) {
-                    stbtt_uint16 subtableOffset = ttUSHORT(subTableOffsets + 2 * sti);
-                    stbtt_uint8 *table = lookupTable + subtableOffset;
-                    stbtt_uint16 posFormat = ttUSHORT(table);
-                    stbtt_uint16 coverageOffset = ttUSHORT(table + 2);
-                    stbtt_int32 coverageIndex = stbtt__GetCoverageIndex(table + coverageOffset, glyph1);
-                    if (coverageIndex == -1) continue;
-
-                    switch (posFormat) {
-                        case 1: {
-                            stbtt_int32 l, r, m;
-                            int straw, needle;
-                            stbtt_uint16 valueFormat1 = ttUSHORT(table + 4);
-                            stbtt_uint16 valueFormat2 = ttUSHORT(table + 6);
-                            stbtt_int32 valueRecordPairSizeInBytes = 2;
-                            stbtt_uint16 pairSetCount = ttUSHORT(table + 8);
-                            stbtt_uint16 pairPosOffset = ttUSHORT(table + 10 + 2 * coverageIndex);
-                            stbtt_uint8 *pairValueTable = table + pairPosOffset;
-                            stbtt_uint16 pairValueCount = ttUSHORT(pairValueTable);
-                            stbtt_uint8 *pairValueArray = pairValueTable + 2;
-                            // TODO: Support more formats.
-                            STBTT_GPOS_TODO_assert(valueFormat1 == 4);
-                            if (valueFormat1 != 4) return 0;
-                            STBTT_GPOS_TODO_assert(valueFormat2 == 0);
-                            if (valueFormat2 != 0) return 0;
-
-                            STBTT_assert(coverageIndex < pairSetCount);
-                            STBTT__NOTUSED(pairSetCount);
-
-                            needle=glyph2;
-                            r=pairValueCount-1;
-                            l=0;
-
-                            // Binary search.
-                            while (l <= r) {
-                                stbtt_uint16 secondGlyph;
-                                stbtt_uint8 *pairValue;
-                                m = (l + r) >> 1;
-                                pairValue = pairValueArray + (2 + valueRecordPairSizeInBytes) * m;
-                                secondGlyph = ttUSHORT(pairValue);
-                                straw = secondGlyph;
-                                if (needle < straw)
-                                    r = m - 1;
-                                else if (needle > straw)
-                                    l = m + 1;
-                                else {
-                                    stbtt_int16 xAdvance = ttSHORT(pairValue + 2);
-                                    return xAdvance;
-                                }
-                            }
-                        } break;
-
-                        case 2: {
-                            stbtt_uint16 valueFormat1 = ttUSHORT(table + 4);
-                            stbtt_uint16 valueFormat2 = ttUSHORT(table + 6);
-
-                            stbtt_uint16 classDef1Offset = ttUSHORT(table + 8);
-                            stbtt_uint16 classDef2Offset = ttUSHORT(table + 10);
-                            int glyph1class = stbtt__GetGlyphClass(table + classDef1Offset, glyph1);
-                            int glyph2class = stbtt__GetGlyphClass(table + classDef2Offset, glyph2);
-
-                            stbtt_uint16 class1Count = ttUSHORT(table + 12);
-                            stbtt_uint16 class2Count = ttUSHORT(table + 14);
-                            STBTT_assert(glyph1class < class1Count);
-                            STBTT_assert(glyph2class < class2Count);
-
-                            // TODO: Support more formats.
-                            STBTT_GPOS_TODO_assert(valueFormat1 == 4);
-                            if (valueFormat1 != 4) return 0;
-                            STBTT_GPOS_TODO_assert(valueFormat2 == 0);
-                            if (valueFormat2 != 0) return 0;
-
-                            if (glyph1class >= 0 && glyph1class < class1Count && glyph2class >= 0 && glyph2class < class2Count) {
-                                stbtt_uint8 *class1Records = table + 16;
-                                stbtt_uint8 *class2Records = class1Records + 2 * (glyph1class * class2Count);
-                                stbtt_int16 xAdvance = ttSHORT(class2Records + 2 * glyph2class);
-                                return xAdvance;
-                            }
-                        } break;
-
-                        default: {
-                            // There are no other cases.
-                            STBTT_assert(0);
-                            break;
-                        };
-                    }
-                }
-                break;
-            };
-
-            default:
-                // TODO: Implement other stuff.
-                break;
-        }
-    }
-
-    return 0;
-}
-
-STBTT_DEF int  stbtt_GetGlyphKernAdvance(const stbtt_fontinfo *info, int g1, int g2)
-{
-   int xAdvance = 0;
-
-   if (info->gpos)
-      xAdvance += stbtt__GetGlyphGPOSInfoAdvance(info, g1, g2);
-   else if (info->kern)
-      xAdvance += stbtt__GetGlyphKernInfoAdvance(info, g1, g2);
-
-   return xAdvance;
-}
-
-STBTT_DEF int  stbtt_GetCodepointKernAdvance(const stbtt_fontinfo *info, int ch1, int ch2)
-{
-   if (!info->kern && !info->gpos) // if no kerning table, don't waste time looking up both codepoint->glyphs
-      return 0;
-   return stbtt_GetGlyphKernAdvance(info, stbtt_FindGlyphIndex(info,ch1), stbtt_FindGlyphIndex(info,ch2));
-}
-
-STBTT_DEF void stbtt_GetCodepointHMetrics(const stbtt_fontinfo *info, int codepoint, int *advanceWidth, int *leftSideBearing)
-{
-   stbtt_GetGlyphHMetrics(info, stbtt_FindGlyphIndex(info,codepoint), advanceWidth, leftSideBearing);
-}
-
-STBTT_DEF void stbtt_GetFontVMetrics(const stbtt_fontinfo *info, int *ascent, int *descent, int *lineGap)
-{
-   if (ascent ) *ascent  = ttSHORT(info->data+info->hhea + 4);
-   if (descent) *descent = ttSHORT(info->data+info->hhea + 6);
-   if (lineGap) *lineGap = ttSHORT(info->data+info->hhea + 8);
-}
-
-STBTT_DEF int  stbtt_GetFontVMetricsOS2(const stbtt_fontinfo *info, int *typoAscent, int *typoDescent, int *typoLineGap)
-{
-   int tab = stbtt__find_table(info->data, info->fontstart, "OS/2");
-   if (!tab)
-      return 0;
-   if (typoAscent ) *typoAscent  = ttSHORT(info->data+tab + 68);
-   if (typoDescent) *typoDescent = ttSHORT(info->data+tab + 70);
-   if (typoLineGap) *typoLineGap = ttSHORT(info->data+tab + 72);
-   return 1;
-}
-
-STBTT_DEF void stbtt_GetFontBoundingBox(const stbtt_fontinfo *info, int *x0, int *y0, int *x1, int *y1)
-{
-   *x0 = ttSHORT(info->data + info->head + 36);
-   *y0 = ttSHORT(info->data + info->head + 38);
-   *x1 = ttSHORT(info->data + info->head + 40);
-   *y1 = ttSHORT(info->data + info->head + 42);
-}
-
-STBTT_DEF double stbtt_ScaleForPixelHeight(const stbtt_fontinfo *info, double height)
-{
-   int fheight = ttSHORT(info->data + info->hhea + 4) - ttSHORT(info->data + info->hhea + 6);
-   return (double) height / fheight;
-}
-
-STBTT_DEF double stbtt_ScaleForMappingEmToPixels(const stbtt_fontinfo *info, double pixels)
-{
-   int unitsPerEm = ttUSHORT(info->data + info->head + 18);
-   return pixels / unitsPerEm;
-}
-
-STBTT_DEF void stbtt_FreeShape(const stbtt_fontinfo *info, stbtt_vertex *v)
-{
-   STBTT_free(v, info->userdata);
-}
-
-STBTT_DEF stbtt_uint8 *stbtt_FindSVGDoc(const stbtt_fontinfo *info, int gl)
-{
-   int i;
-   stbtt_uint8 *data = info->data;
-   stbtt_uint8 *svg_doc_list = data + stbtt__get_svg((stbtt_fontinfo *) info);
-
-   int numEntries = ttUSHORT(svg_doc_list);
-   stbtt_uint8 *svg_docs = svg_doc_list + 2;
-
-   for(i=0; i<numEntries; i++) {
-      stbtt_uint8 *svg_doc = svg_docs + (12 * i);
-      if ((gl >= ttUSHORT(svg_doc)) && (gl <= ttUSHORT(svg_doc + 2)))
-         return svg_doc;
-   }
-   return 0;
-}
-
-STBTT_DEF int stbtt_GetGlyphSVG(const stbtt_fontinfo *info, int gl, const char **svg)
-{
-   stbtt_uint8 *data = info->data;
-   stbtt_uint8 *svg_doc;
-
-   if (info->svg == 0)
-      return 0;
-
-   svg_doc = stbtt_FindSVGDoc(info, gl);
-   if (svg_doc != NULL) {
-      *svg = (char *) data + info->svg + ttULONG(svg_doc + 4);
-      return ttULONG(svg_doc + 8);
-   } else {
-      return 0;
-   }
-}
-
-STBTT_DEF int stbtt_GetCodepointSVG(const stbtt_fontinfo *info, int unicode_codepoint, const char **svg)
-{
-   return stbtt_GetGlyphSVG(info, stbtt_FindGlyphIndex(info, unicode_codepoint), svg);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// antialiasing software rasterizer
-//
-
-STBTT_DEF void stbtt_GetGlyphBitmapBoxSubpixel(const stbtt_fontinfo *font, int glyph, double scale_x, double scale_y,double shift_x, double shift_y, int *ix0, int *iy0, int *ix1, int *iy1)
-{
-   int x0=0,y0=0,x1,y1; // =0 suppresses compiler warning
-   if (!stbtt_GetGlyphBox(font, glyph, &x0,&y0,&x1,&y1)) {
-      // e.g. space character
-      if (ix0) *ix0 = 0;
-      if (iy0) *iy0 = 0;
-      if (ix1) *ix1 = 0;
-      if (iy1) *iy1 = 0;
-   } else {
-      // move to integral bboxes (treating pixels as little squares, what pixels get touched)?
-      if (ix0) *ix0 = STBTT_ifloor( x0 * scale_x + shift_x);
-      if (iy0) *iy0 = STBTT_ifloor(-y1 * scale_y + shift_y);
-      if (ix1) *ix1 = STBTT_iceil ( x1 * scale_x + shift_x);
-      if (iy1) *iy1 = STBTT_iceil (-y0 * scale_y + shift_y);
-   }
-}
-
-STBTT_DEF void stbtt_GetGlyphBitmapBox(const stbtt_fontinfo *font, int glyph, double scale_x, double scale_y, int *ix0, int *iy0, int *ix1, int *iy1)
-{
-   stbtt_GetGlyphBitmapBoxSubpixel(font, glyph, scale_x, scale_y,0.0f,0.0f, ix0, iy0, ix1, iy1);
-}
-
-STBTT_DEF void stbtt_GetCodepointBitmapBoxSubpixel(const stbtt_fontinfo *font, int codepoint, double scale_x, double scale_y, double shift_x, double shift_y, int *ix0, int *iy0, int *ix1, int *iy1)
-{
-   stbtt_GetGlyphBitmapBoxSubpixel(font, stbtt_FindGlyphIndex(font,codepoint), scale_x, scale_y,shift_x,shift_y, ix0,iy0,ix1,iy1);
-}
-
-STBTT_DEF void stbtt_GetCodepointBitmapBox(const stbtt_fontinfo *font, int codepoint, double scale_x, double scale_y, int *ix0, int *iy0, int *ix1, int *iy1)
-{
-   stbtt_GetCodepointBitmapBoxSubpixel(font, codepoint, scale_x, scale_y,0.0f,0.0f, ix0,iy0,ix1,iy1);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-//
-//  Rasterizer
-
-typedef struct stbtt__hheap_chunk
-{
-   struct stbtt__hheap_chunk *next;
-} stbtt__hheap_chunk;
-
-typedef struct stbtt__hheap
-{
-   struct stbtt__hheap_chunk *head;
-   void   *first_free;
-   int    num_remaining_in_head_chunk;
-} stbtt__hheap;
-
-static void *stbtt__hheap_alloc(stbtt__hheap *hh, size_t size, void *userdata)
-{
-   if (hh->first_free) {
-      void *p = hh->first_free;
-      hh->first_free = * (void **) p;
-      return p;
-   } else {
-      if (hh->num_remaining_in_head_chunk == 0) {
-         int count = (size < 32 ? 2000 : size < 128 ? 800 : 100);
-         stbtt__hheap_chunk *c = (stbtt__hheap_chunk *) STBTT_malloc(sizeof(stbtt__hheap_chunk) + size * count, userdata);
-         if (c == NULL)
-            return NULL;
-         c->next = hh->head;
-         hh->head = c;
-         hh->num_remaining_in_head_chunk = count;
-      }
-      --hh->num_remaining_in_head_chunk;
-      return (char *) (hh->head) + sizeof(stbtt__hheap_chunk) + size * hh->num_remaining_in_head_chunk;
-   }
-}
-
-static void stbtt__hheap_free(stbtt__hheap *hh, void *p)
-{
-   *(void **) p = hh->first_free;
-   hh->first_free = p;
-}
-
-static void stbtt__hheap_cleanup(stbtt__hheap *hh, void *userdata)
-{
-   stbtt__hheap_chunk *c = hh->head;
-   while (c) {
-      stbtt__hheap_chunk *n = c->next;
-      STBTT_free(c, userdata);
-      c = n;
-   }
-}
-
-typedef struct stbtt__edge {
-   double x0,y0, x1,y1;
-   int invert;
-} stbtt__edge;
-
-
-typedef struct stbtt__active_edge
-{
-   struct stbtt__active_edge *next;
-   #if STBTT_RASTERIZER_VERSION==1
-   int x,dx;
-   double ey;
-   int direction;
-   #elif STBTT_RASTERIZER_VERSION==2
-   double fx,fdx,fdy;
-   double direction;
-   double sy;
-   double ey;
-   #else
-   #error "Unrecognized value of STBTT_RASTERIZER_VERSION"
-   #endif
-} stbtt__active_edge;
-
-#if STBTT_RASTERIZER_VERSION == 1
-#define STBTT_FIXSHIFT   10
-#define STBTT_FIX        (1 << STBTT_FIXSHIFT)
-#define STBTT_FIXMASK    (STBTT_FIX-1)
-
-static stbtt__active_edge *stbtt__new_active(stbtt__hheap *hh, stbtt__edge *e, int off_x, double start_point, void *userdata)
-{
-   stbtt__active_edge *z = (stbtt__active_edge *) stbtt__hheap_alloc(hh, sizeof(*z), userdata);
-   double dxdy = (e->x1 - e->x0) / (e->y1 - e->y0);
-   STBTT_assert(z != NULL);
-   if (!z) return z;
-
-   // round dx down to avoid overshooting
-   if (dxdy < 0)
-      z->dx = -STBTT_ifloor(STBTT_FIX * -dxdy);
-   else
-      z->dx = STBTT_ifloor(STBTT_FIX * dxdy);
-
-   z->x = STBTT_ifloor(STBTT_FIX * e->x0 + z->dx * (start_point - e->y0)); // use z->dx so when we offset later it's by the same amount
-   z->x -= off_x * STBTT_FIX;
-
-   z->ey = e->y1;
-   z->next = 0;
-   z->direction = e->invert ? 1 : -1;
-   return z;
-}
-#elif STBTT_RASTERIZER_VERSION == 2
-static stbtt__active_edge *stbtt__new_active(stbtt__hheap *hh, stbtt__edge *e, int off_x, double start_point, void *userdata)
-{
-   stbtt__active_edge *z = (stbtt__active_edge *) stbtt__hheap_alloc(hh, sizeof(*z), userdata);
-   double dxdy = (e->x1 - e->x0) / (e->y1 - e->y0);
-   STBTT_assert(z != NULL);
-   //STBTT_assert(e->y0 <= start_point);
-   if (!z) return z;
-   z->fdx = dxdy;
-   z->fdy = dxdy != 0.0f ? (1.0f/dxdy) : 0.0f;
-   z->fx = e->x0 + dxdy * (start_point - e->y0);
-   z->fx -= off_x;
-   z->direction = e->invert ? 1.0f : -1.0f;
-   z->sy = e->y0;
-   z->ey = e->y1;
-   z->next = 0;
-   return z;
-}
-#else
-#error "Unrecognized value of STBTT_RASTERIZER_VERSION"
-#endif
-
-#if STBTT_RASTERIZER_VERSION == 1
-// note: this routine clips fills that extend off the edges... ideally this
-// wouldn't happen, but it could happen if the truetype glyph bounding boxes
-// are wrong, or if the user supplies a too-small bitmap
-static void stbtt__fill_active_edges(unsigned char *scanline, int len, stbtt__active_edge *e, int max_weight)
-{
-   // non-zero winding fill
-   int x0=0, w=0;
-
-   while (e) {
-      if (w == 0) {
-         // if we're currently at zero, we need to record the edge start point
-         x0 = e->x; w += e->direction;
-      } else {
-         int x1 = e->x; w += e->direction;
-         // if we went to zero, we need to draw
-         if (w == 0) {
-            int i = x0 >> STBTT_FIXSHIFT;
-            int j = x1 >> STBTT_FIXSHIFT;
-
-            if (i < len && j >= 0) {
-               if (i == j) {
-                  // x0,x1 are the same pixel, so compute combined coverage
-                  scanline[i] = scanline[i] + (stbtt_uint8) ((x1 - x0) * max_weight >> STBTT_FIXSHIFT);
-               } else {
-                  if (i >= 0) // add antialiasing for x0
-                     scanline[i] = scanline[i] + (stbtt_uint8) (((STBTT_FIX - (x0 & STBTT_FIXMASK)) * max_weight) >> STBTT_FIXSHIFT);
-                  else
-                     i = -1; // clip
-
-                  if (j < len) // add antialiasing for x1
-                     scanline[j] = scanline[j] + (stbtt_uint8) (((x1 & STBTT_FIXMASK) * max_weight) >> STBTT_FIXSHIFT);
-                  else
-                     j = len; // clip
-
-                  for (++i; i < j; ++i) // fill pixels between x0 and x1
-                     scanline[i] = scanline[i] + (stbtt_uint8) max_weight;
-               }
-            }
-         }
-      }
-
-      e = e->next;
-   }
-}
-
-static void stbtt__rasterize_sorted_edges(stbtt__bitmap *result, stbtt__edge *e, int n, int vsubsample, int off_x, int off_y, void *userdata)
-{
-   stbtt__hheap hh = { 0, 0, 0 };
-   stbtt__active_edge *active = NULL;
-   int y,j=0;
-   int max_weight = (255 / vsubsample);  // weight per vertical scanline
-   int s; // vertical subsample index
-   unsigned char scanline_data[512], *scanline;
-
-   if (result->w > 512)
-      scanline = (unsigned char *) STBTT_malloc(result->w, userdata);
-   else
-      scanline = scanline_data;
-
-   y = off_y * vsubsample;
-   e[n].y0 = (off_y + result->h) * (double) vsubsample + 1;
-
-   while (j < result->h) {
-      STBTT_memset(scanline, 0, result->w);
-      for (s=0; s < vsubsample; ++s) {
-         // find center of pixel for this scanline
-         double scan_y = y + 0.5f;
-         stbtt__active_edge **step = &active;
-
-         // update all active edges;
-         // remove all active edges that terminate before the center of this scanline
-         while (*step) {
-            stbtt__active_edge * z = *step;
-            if (z->ey <= scan_y) {
-               *step = z->next; // delete from list
-               STBTT_assert(z->direction);
-               z->direction = 0;
-               stbtt__hheap_free(&hh, z);
-            } else {
-               z->x += z->dx; // advance to position for current scanline
-               step = &((*step)->next); // advance through list
-            }
-         }
-
-         // resort the list if needed
-         for(;;) {
-            int changed=0;
-            step = &active;
-            while (*step && (*step)->next) {
-               if ((*step)->x > (*step)->next->x) {
-                  stbtt__active_edge *t = *step;
-                  stbtt__active_edge *q = t->next;
-
-                  t->next = q->next;
-                  q->next = t;
-                  *step = q;
-                  changed = 1;
-               }
-               step = &(*step)->next;
-            }
-            if (!changed) break;
-         }
-
-         // insert all edges that start before the center of this scanline -- omit ones that also end on this scanline
-         while (e->y0 <= scan_y) {
-            if (e->y1 > scan_y) {
-               stbtt__active_edge *z = stbtt__new_active(&hh, e, off_x, scan_y, userdata);
-               if (z != NULL) {
-                  // find insertion point
-                  if (active == NULL)
-                     active = z;
-                  else if (z->x < active->x) {
-                     // insert at front
-                     z->next = active;
-                     active = z;
-                  } else {
-                     // find thing to insert AFTER
-                     stbtt__active_edge *p = active;
-                     while (p->next && p->next->x < z->x)
-                        p = p->next;
-                     // at this point, p->next->x is NOT < z->x
-                     z->next = p->next;
-                     p->next = z;
-                  }
-               }
-            }
-            ++e;
-         }
-
-         // now process all active edges in XOR fashion
-         if (active)
-            stbtt__fill_active_edges(scanline, result->w, active, max_weight);
-
-         ++y;
-      }
-      STBTT_memcpy(result->pixels + j * result->stride, scanline, result->w);
-      ++j;
-   }
-
-   stbtt__hheap_cleanup(&hh, userdata);
-
-   if (scanline != scanline_data)
-      STBTT_free(scanline, userdata);
-}
-
-#elif STBTT_RASTERIZER_VERSION == 2
-
-// the edge passed in here does not cross the vertical line at x or the vertical line at x+1
-// (i.e. it has already been clipped to those)
-static void stbtt__handle_clipped_edge(double *scanline, int x, stbtt__active_edge *e, double x0, double y0, double x1, double y1)
-{
-   if (y0 == y1) return;
-   STBTT_assert(y0 < y1);
-   STBTT_assert(e->sy <= e->ey);
-   if (y0 > e->ey) return;
-   if (y1 < e->sy) return;
-   if (y0 < e->sy) {
-      x0 += (x1-x0) * (e->sy - y0) / (y1-y0);
-      y0 = e->sy;
-   }
-   if (y1 > e->ey) {
-      x1 += (x1-x0) * (e->ey - y1) / (y1-y0);
-      y1 = e->ey;
-   }
-
-   if (x0 == x)
-      STBTT_assert(x1 <= x+1);
-   else if (x0 == x+1)
-      STBTT_assert(x1 >= x);
-   else if (x0 <= x)
-      STBTT_assert(x1 <= x);
-   else if (x0 >= x+1)
-      STBTT_assert(x1 >= x+1);
-   else
-      STBTT_assert(x1 >= x && x1 <= x+1);
-
-   if (x0 <= x && x1 <= x)
-      scanline[x] += e->direction * (y1-y0);
-   else if (x0 >= x+1 && x1 >= x+1)
-      ;
-   else {
-      STBTT_assert(x0 >= x && x0 <= x+1 && x1 >= x && x1 <= x+1);
-      scanline[x] += e->direction * (y1-y0) * (1-((x0-x)+(x1-x))/2); // coverage = 1 - average x position
-   }
-}
-
-static void stbtt__fill_active_edges_new(double *scanline, double *scanline_fill, int len, stbtt__active_edge *e, double y_top)
-{
-   double y_bottom = y_top+1;
-
-   while (e) {
-      // brute force every pixel
-
-      // compute intersection points with top & bottom
-      STBTT_assert(e->ey >= y_top);
-
-      if (e->fdx == 0) {
-         double x0 = e->fx;
-         if (x0 < len) {
-            if (x0 >= 0) {
-               stbtt__handle_clipped_edge(scanline,(int) x0,e, x0,y_top, x0,y_bottom);
-               stbtt__handle_clipped_edge(scanline_fill-1,(int) x0+1,e, x0,y_top, x0,y_bottom);
-            } else {
-               stbtt__handle_clipped_edge(scanline_fill-1,0,e, x0,y_top, x0,y_bottom);
-            }
-         }
-      } else {
-         double x0 = e->fx;
-         double dx = e->fdx;
-         double xb = x0 + dx;
-         double x_top, x_bottom;
-         double sy0,sy1;
-         double dy = e->fdy;
-         STBTT_assert(e->sy <= y_bottom && e->ey >= y_top);
-
-         // compute endpoints of line segment clipped to this scanline (if the
-         // line segment starts on this scanline. x0 is the intersection of the
-         // line with y_top, but that may be off the line segment.
-         if (e->sy > y_top) {
-            x_top = x0 + dx * (e->sy - y_top);
-            sy0 = e->sy;
-         } else {
-            x_top = x0;
-            sy0 = y_top;
-         }
-         if (e->ey < y_bottom) {
-            x_bottom = x0 + dx * (e->ey - y_top);
-            sy1 = e->ey;
-         } else {
-            x_bottom = xb;
-            sy1 = y_bottom;
-         }
-
-         if (x_top >= 0 && x_bottom >= 0 && x_top < len && x_bottom < len) {
-            // from here on, we don't have to range check x values
-
-            if ((int) x_top == (int) x_bottom) {
-               double height;
-               // simple case, only spans one pixel
-               int x = (int) x_top;
-               height = sy1 - sy0;
-               STBTT_assert(x >= 0 && x < len);
-               scanline[x] += e->direction * (1-((x_top - x) + (x_bottom-x))/2)  * height;
-               scanline_fill[x] += e->direction * height; // everything right of this pixel is filled
-            } else {
-               int x,x1,x2;
-               double y_crossing, step, sign, area;
-               // covers 2+ pixels
-               if (x_top > x_bottom) {
-                  // flip scanline vertically; signed area is the same
-                  double t;
-                  sy0 = y_bottom - (sy0 - y_top);
-                  sy1 = y_bottom - (sy1 - y_top);
-                  t = sy0, sy0 = sy1, sy1 = t;
-                  t = x_bottom, x_bottom = x_top, x_top = t;
-                  dx = -dx;
-                  dy = -dy;
-                  t = x0, x0 = xb, xb = t;
-               }
-
-               x1 = (int) x_top;
-               x2 = (int) x_bottom;
-               // compute intersection with y axis at x1+1
-               y_crossing = (x1+1 - x0) * dy + y_top;
-
-               sign = e->direction;
-               // area of the rectangle covered from y0..y_crossing
-               area = sign * (y_crossing-sy0);
-               // area of the triangle (x_top,y0), (x+1,y0), (x+1,y_crossing)
-               scanline[x1] += area * (1-((x_top - x1)+(x1+1-x1))/2);
-
-               step = sign * dy;
-               for (x = x1+1; x < x2; ++x) {
-                  scanline[x] += area + step/2;
-                  area += step;
-               }
-               y_crossing += dy * (x2 - (x1+1));
-
-               STBTT_assert(STBTT_fabs(area) <= 1.01f);
-
-               scanline[x2] += area + sign * (1-((x2-x2)+(x_bottom-x2))/2) * (sy1-y_crossing);
-
-               scanline_fill[x2] += sign * (sy1-sy0);
-            }
-         } else {
-            // if edge goes outside of box we're drawing, we require
-            // clipping logic. since this does not match the intended use
-            // of this library, we use a different, very slow brute
-            // force implementation
-            int x;
-            for (x=0; x < len; ++x) {
-               // cases:
-               //
-               // there can be up to two intersections with the pixel. any intersection
-               // with left or right edges can be handled by splitting into two (or three)
-               // regions. intersections with top & bottom do not necessitate case-wise logic.
-               //
-               // the old way of doing this found the intersections with the left & right edges,
-               // then used some simple logic to produce up to three segments in sorted order
-               // from top-to-bottom. however, this had a problem: if an x edge was epsilon
-               // across the x border, then the corresponding y position might not be distinct
-               // from the other y segment, and it might ignored as an empty segment. to avoid
-               // that, we need to explicitly produce segments based on x positions.
-
-               // rename variables to clearly-defined pairs
-               double y0 = y_top;
-               double x1 = (double) (x);
-               double x2 = (double) (x+1);
-               double x3 = xb;
-               double y3 = y_bottom;
-
-               // x = e->x + e->dx * (y-y_top)
-               // (y-y_top) = (x - e->x) / e->dx
-               // y = (x - e->x) / e->dx + y_top
-               double y1 = (x - x0) / dx + y_top;
-               double y2 = (x+1 - x0) / dx + y_top;
-
-               if (x0 < x1 && x3 > x2) {         // three segments descending down-right
-                  stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x1,y1);
-                  stbtt__handle_clipped_edge(scanline,x,e, x1,y1, x2,y2);
-                  stbtt__handle_clipped_edge(scanline,x,e, x2,y2, x3,y3);
-               } else if (x3 < x1 && x0 > x2) {  // three segments descending down-left
-                  stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x2,y2);
-                  stbtt__handle_clipped_edge(scanline,x,e, x2,y2, x1,y1);
-                  stbtt__handle_clipped_edge(scanline,x,e, x1,y1, x3,y3);
-               } else if (x0 < x1 && x3 > x1) {  // two segments across x, down-right
-                  stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x1,y1);
-                  stbtt__handle_clipped_edge(scanline,x,e, x1,y1, x3,y3);
-               } else if (x3 < x1 && x0 > x1) {  // two segments across x, down-left
-                  stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x1,y1);
-                  stbtt__handle_clipped_edge(scanline,x,e, x1,y1, x3,y3);
-               } else if (x0 < x2 && x3 > x2) {  // two segments across x+1, down-right
-                  stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x2,y2);
-                  stbtt__handle_clipped_edge(scanline,x,e, x2,y2, x3,y3);
-               } else if (x3 < x2 && x0 > x2) {  // two segments across x+1, down-left
-                  stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x2,y2);
-                  stbtt__handle_clipped_edge(scanline,x,e, x2,y2, x3,y3);
-               } else {  // one segment
-                  stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x3,y3);
-               }
-            }
-         }
-      }
-      e = e->next;
-   }
-}
-
-// directly AA rasterize edges w/o supersampling
-static void stbtt__rasterize_sorted_edges(stbtt__bitmap *result, stbtt__edge *e, int n, int vsubsample, int off_x, int off_y, void *userdata)
-{
-   stbtt__hheap hh = { 0, 0, 0 };
-   stbtt__active_edge *active = NULL;
-   int y,j=0, i;
-   double scanline_data[129], *scanline, *scanline2;
-
-   STBTT__NOTUSED(vsubsample);
-
-   if (result->w > 64)
-      scanline = (double *) STBTT_malloc((result->w*2+1) * sizeof(double), userdata);
-   else
-      scanline = scanline_data;
-
-   scanline2 = scanline + result->w;
-
-   y = off_y;
-   e[n].y0 = (double) (off_y + result->h) + 1;
-
-   while (j < result->h) {
-      // find center of pixel for this scanline
-      double scan_y_top    = y + 0.0f;
-      double scan_y_bottom = y + 1.0f;
-      stbtt__active_edge **step = &active;
-
-      STBTT_memset(scanline , 0, result->w*sizeof(scanline[0]));
-      STBTT_memset(scanline2, 0, (result->w+1)*sizeof(scanline[0]));
-
-      // update all active edges;
-      // remove all active edges that terminate before the top of this scanline
-      while (*step) {
-         stbtt__active_edge * z = *step;
-         if (z->ey <= scan_y_top) {
-            *step = z->next; // delete from list
-            STBTT_assert(z->direction);
-            z->direction = 0;
-            stbtt__hheap_free(&hh, z);
-         } else {
-            step = &((*step)->next); // advance through list
-         }
-      }
-
-      // insert all edges that start before the bottom of this scanline
-      while (e->y0 <= scan_y_bottom) {
-         if (e->y0 != e->y1) {
-            stbtt__active_edge *z = stbtt__new_active(&hh, e, off_x, scan_y_top, userdata);
-            if (z != NULL) {
-               if (j == 0 && off_y != 0) {
-                  if (z->ey < scan_y_top) {
-                     // this can happen due to subpixel positioning and some kind of fp rounding error i think
-                     z->ey = scan_y_top;
-                  }
-               }
-               STBTT_assert(z->ey >= scan_y_top); // if we get really unlucky a tiny bit of an edge can be out of bounds
-               // insert at front
-               z->next = active;
-               active = z;
-            }
-         }
-         ++e;
-      }
-
-      // now process all active edges
-      if (active)
-         stbtt__fill_active_edges_new(scanline, scanline2+1, result->w, active, scan_y_top);
-
-      {
-         double sum = 0;
-         for (i=0; i < result->w; ++i) {
-            double k;
-            int m;
-            sum += scanline2[i];
-            k = scanline[i] + sum;
-            k = (double) STBTT_fabs(k)*255 + 0.5f;
-            m = (int) k;
-            if (m > 255) m = 255;
-            result->pixels[j*result->stride + i] = (unsigned char) m;
-         }
-      }
-      // advance all the edges
-      step = &active;
-      while (*step) {
-         stbtt__active_edge *z = *step;
-         z->fx += z->fdx; // advance to position for current scanline
-         step = &((*step)->next); // advance through list
-      }
-
-      ++y;
-      ++j;
-   }
-
-   stbtt__hheap_cleanup(&hh, userdata);
-
-   if (scanline != scanline_data)
-      STBTT_free(scanline, userdata);
-}
-#else
-#error "Unrecognized value of STBTT_RASTERIZER_VERSION"
-#endif
-
-#define STBTT__COMPARE(a,b)  ((a)->y0 < (b)->y0)
-
-static void stbtt__sort_edges_ins_sort(stbtt__edge *p, int n)
-{
-   int i,j;
-   for (i=1; i < n; ++i) {
-      stbtt__edge t = p[i], *a = &t;
-      j = i;
-      while (j > 0) {
-         stbtt__edge *b = &p[j-1];
-         int c = STBTT__COMPARE(a,b);
-         if (!c) break;
-         p[j] = p[j-1];
-         --j;
-      }
-      if (i != j)
-         p[j] = t;
-   }
-}
-
-static void stbtt__sort_edges_quicksort(stbtt__edge *p, int n)
-{
-   /* threshold for transitioning to insertion sort */
-   while (n > 12) {
-      stbtt__edge t;
-      int c01,c12,c,m,i,j;
-
-      /* compute median of three */
-      m = n >> 1;
-      c01 = STBTT__COMPARE(&p[0],&p[m]);
-      c12 = STBTT__COMPARE(&p[m],&p[n-1]);
-      /* if 0 >= mid >= end, or 0 < mid < end, then use mid */
-      if (c01 != c12) {
-         /* otherwise, we'll need to swap something else to middle */
-         int z;
-         c = STBTT__COMPARE(&p[0],&p[n-1]);
-         /* 0>mid && mid<n:  0>n => n; 0<n => 0 */
-         /* 0<mid && mid>n:  0>n => 0; 0<n => n */
-         z = (c == c12) ? 0 : n-1;
-         t = p[z];
-         p[z] = p[m];
-         p[m] = t;
-      }
-      /* now p[m] is the median-of-three */
-      /* swap it to the beginning so it won't move around */
-      t = p[0];
-      p[0] = p[m];
-      p[m] = t;
-
-      /* partition loop */
-      i=1;
-      j=n-1;
-      for(;;) {
-         /* handling of equality is crucial here */
-         /* for sentinels & efficiency with duplicates */
-         for (;;++i) {
-            if (!STBTT__COMPARE(&p[i], &p[0])) break;
-         }
-         for (;;--j) {
-            if (!STBTT__COMPARE(&p[0], &p[j])) break;
-         }
-         /* make sure we haven't crossed */
-         if (i >= j) break;
-         t = p[i];
-         p[i] = p[j];
-         p[j] = t;
-
-         ++i;
-         --j;
-      }
-      /* recurse on smaller side, iterate on larger */
-      if (j < (n-i)) {
-         stbtt__sort_edges_quicksort(p,j);
-         p = p+i;
-         n = n-i;
-      } else {
-         stbtt__sort_edges_quicksort(p+i, n-i);
-         n = j;
-      }
-   }
-}
-
-static void stbtt__sort_edges(stbtt__edge *p, int n)
-{
-   stbtt__sort_edges_quicksort(p, n);
-   stbtt__sort_edges_ins_sort(p, n);
-}
-
-typedef struct
-{
-   double x,y;
-} stbtt__point;
-
-static void stbtt__rasterize(stbtt__bitmap *result, stbtt__point *pts, int *wcount, int windings, double scale_x, double scale_y, double shift_x, double shift_y, int off_x, int off_y, int invert, void *userdata)
-{
-   double y_scale_inv = invert ? -scale_y : scale_y;
-   stbtt__edge *e;
-   int n,i,j,k,m;
-#if STBTT_RASTERIZER_VERSION == 1
-   int vsubsample = result->h < 8 ? 15 : 5;
-#elif STBTT_RASTERIZER_VERSION == 2
-   int vsubsample = 1;
-#else
-   #error "Unrecognized value of STBTT_RASTERIZER_VERSION"
-#endif
-   // vsubsample should divide 255 evenly; otherwise we won't reach full opacity
-
-   // now we have to blow out the windings into explicit edge lists
-   n = 0;
-   for (i=0; i < windings; ++i)
-      n += wcount[i];
-
-   e = (stbtt__edge *) STBTT_malloc(sizeof(*e) * (n+1), userdata); // add an extra one as a sentinel
-   if (e == 0) return;
-   n = 0;
-
-   m=0;
-   for (i=0; i < windings; ++i) {
-      stbtt__point *p = pts + m;
-      m += wcount[i];
-      j = wcount[i]-1;
-      for (k=0; k < wcount[i]; j=k++) {
-         int a=k,b=j;
-         // skip the edge if horizontal
-         if (p[j].y == p[k].y)
-            continue;
-         // add edge from j to k to the list
-         e[n].invert = 0;
-         if (invert ? p[j].y > p[k].y : p[j].y < p[k].y) {
-            e[n].invert = 1;
-            a=j,b=k;
-         }
-         e[n].x0 = p[a].x * scale_x + shift_x;
-         e[n].y0 = (p[a].y * y_scale_inv + shift_y) * vsubsample;
-         e[n].x1 = p[b].x * scale_x + shift_x;
-         e[n].y1 = (p[b].y * y_scale_inv + shift_y) * vsubsample;
-         ++n;
-      }
-   }
-
-   // now sort the edges by their highest point (should snap to integer, and then by x)
-   //STBTT_sort(e, n, sizeof(e[0]), stbtt__edge_compare);
-   stbtt__sort_edges(e, n);
-
-   // now, traverse the scanlines and find the intersections on each scanline, use xor winding rule
-   stbtt__rasterize_sorted_edges(result, e, n, vsubsample, off_x, off_y, userdata);
-
-   STBTT_free(e, userdata);
-}
-
-static void stbtt__add_point(stbtt__point *points, int n, double x, double y)
-{
-   if (!points) return; // during first pass, it's unallocated
-   points[n].x = x;
-   points[n].y = y;
-}
-
-// tessellate until threshold p is happy... @TODO warped to compensate for non-linear stretching
-static int stbtt__tesselate_curve(stbtt__point *points, int *num_points, double x0, double y0, double x1, double y1, double x2, double y2, double objspace_flatness_squared, int n)
-{
-   // midpoint
-   double mx = (x0 + 2*x1 + x2)/4;
-   double my = (y0 + 2*y1 + y2)/4;
-   // versus directly drawn line
-   double dx = (x0+x2)/2 - mx;
-   double dy = (y0+y2)/2 - my;
-   if (n > 16) // 65536 segments on one curve better be enough!
-      return 1;
-   if (dx*dx+dy*dy > objspace_flatness_squared) { // half-pixel error allowed... need to be smaller if AA
-      stbtt__tesselate_curve(points, num_points, x0,y0, (x0+x1)/2.0f,(y0+y1)/2.0f, mx,my, objspace_flatness_squared,n+1);
-      stbtt__tesselate_curve(points, num_points, mx,my, (x1+x2)/2.0f,(y1+y2)/2.0f, x2,y2, objspace_flatness_squared,n+1);
-   } else {
-      stbtt__add_point(points, *num_points,x2,y2);
-      *num_points = *num_points+1;
-   }
-   return 1;
-}
-
-static void stbtt__tesselate_cubic(stbtt__point *points, int *num_points, double x0, double y0, double x1, double y1, double x2, double y2, double x3, double y3, double objspace_flatness_squared, int n)
-{
-   // @TODO this "flatness" calculation is just made-up nonsense that seems to work well enough
-   double dx0 = x1-x0;
-   double dy0 = y1-y0;
-   double dx1 = x2-x1;
-   double dy1 = y2-y1;
-   double dx2 = x3-x2;
-   double dy2 = y3-y2;
-   double dx = x3-x0;
-   double dy = y3-y0;
-   double longlen = (double) (STBTT_sqrt(dx0*dx0+dy0*dy0)+STBTT_sqrt(dx1*dx1+dy1*dy1)+STBTT_sqrt(dx2*dx2+dy2*dy2));
-   double shortlen = (double) STBTT_sqrt(dx*dx+dy*dy);
-   double flatness_squared = longlen*longlen-shortlen*shortlen;
-
-   if (n > 16) // 65536 segments on one curve better be enough!
-      return;
-
-   if (flatness_squared > objspace_flatness_squared) {
-      double x01 = (x0+x1)/2;
-      double y01 = (y0+y1)/2;
-      double x12 = (x1+x2)/2;
-      double y12 = (y1+y2)/2;
-      double x23 = (x2+x3)/2;
-      double y23 = (y2+y3)/2;
-
-      double xa = (x01+x12)/2;
-      double ya = (y01+y12)/2;
-      double xb = (x12+x23)/2;
-      double yb = (y12+y23)/2;
-
-      double mx = (xa+xb)/2;
-      double my = (ya+yb)/2;
-
-      stbtt__tesselate_cubic(points, num_points, x0,y0, x01,y01, xa,ya, mx,my, objspace_flatness_squared,n+1);
-      stbtt__tesselate_cubic(points, num_points, mx,my, xb,yb, x23,y23, x3,y3, objspace_flatness_squared,n+1);
-   } else {
-      stbtt__add_point(points, *num_points,x3,y3);
-      *num_points = *num_points+1;
-   }
-}
-
-// returns number of contours
-static stbtt__point *stbtt_FlattenCurves(stbtt_vertex *vertices, int num_verts, double objspace_flatness, int **contour_lengths, int *num_contours, void *userdata)
-{
-   stbtt__point *points=0;
-   int num_points=0;
-
-   double objspace_flatness_squared = objspace_flatness * objspace_flatness;
-   int i,n=0,start=0, pass;
-
-   // count how many "moves" there are to get the contour count
-   for (i=0; i < num_verts; ++i)
-      if (vertices[i].type == STBTT_vmove)
-         ++n;
-
-   *num_contours = n;
-   if (n == 0) return 0;
-
-   *contour_lengths = (int *) STBTT_malloc(sizeof(**contour_lengths) * n, userdata);
-
-   if (*contour_lengths == 0) {
-      *num_contours = 0;
-      return 0;
-   }
-
-   // make two passes through the points so we don't need to realloc
-   for (pass=0; pass < 2; ++pass) {
-      double x=0,y=0;
-      if (pass == 1) {
-         points = (stbtt__point *) STBTT_malloc(num_points * sizeof(points[0]), userdata);
-         if (points == NULL) goto error;
-      }
-      num_points = 0;
-      n= -1;
-      for (i=0; i < num_verts; ++i) {
-         switch (vertices[i].type) {
-            case STBTT_vmove:
-               // start the next contour
-               if (n >= 0)
-                  (*contour_lengths)[n] = num_points - start;
-               ++n;
-               start = num_points;
-
-               x = vertices[i].x, y = vertices[i].y;
-               stbtt__add_point(points, num_points++, x,y);
-               break;
-            case STBTT_vline:
-               x = vertices[i].x, y = vertices[i].y;
-               stbtt__add_point(points, num_points++, x, y);
-               break;
-            case STBTT_vcurve:
-               stbtt__tesselate_curve(points, &num_points, x,y,
-                                        vertices[i].cx, vertices[i].cy,
-                                        vertices[i].x,  vertices[i].y,
-                                        objspace_flatness_squared, 0);
-               x = vertices[i].x, y = vertices[i].y;
-               break;
-            case STBTT_vcubic:
-               stbtt__tesselate_cubic(points, &num_points, x,y,
-                                        vertices[i].cx, vertices[i].cy,
-                                        vertices[i].cx1, vertices[i].cy1,
-                                        vertices[i].x,  vertices[i].y,
-                                        objspace_flatness_squared, 0);
-               x = vertices[i].x, y = vertices[i].y;
-               break;
-         }
-      }
-      (*contour_lengths)[n] = num_points - start;
-   }
-
-   return points;
-error:
-   STBTT_free(points, userdata);
-   STBTT_free(*contour_lengths, userdata);
-   *contour_lengths = 0;
-   *num_contours = 0;
-   return NULL;
-}
-
-STBTT_DEF void stbtt_Rasterize(stbtt__bitmap *result, double flatness_in_pixels, stbtt_vertex *vertices, int num_verts, double scale_x, double scale_y, double shift_x, double shift_y, int x_off, int y_off, int invert, void *userdata)
-{
-   double scale            = scale_x > scale_y ? scale_y : scale_x;
-   int winding_count      = 0;
-   int *winding_lengths   = NULL;
-   stbtt__point *windings = stbtt_FlattenCurves(vertices, num_verts, flatness_in_pixels / scale, &winding_lengths, &winding_count, userdata);
-   if (windings) {
-      stbtt__rasterize(result, windings, winding_lengths, winding_count, scale_x, scale_y, shift_x, shift_y, x_off, y_off, invert, userdata);
-      STBTT_free(winding_lengths, userdata);
-      STBTT_free(windings, userdata);
-   }
-}
-
-STBTT_DEF void stbtt_FreeBitmap(unsigned char *bitmap, void *userdata)
-{
-   STBTT_free(bitmap, userdata);
-}
-
-STBTT_DEF unsigned char *stbtt_GetGlyphBitmapSubpixel(const stbtt_fontinfo *info, double scale_x, double scale_y, double shift_x, double shift_y, int glyph, int *width, int *height, int *xoff, int *yoff)
-{
-   int ix0,iy0,ix1,iy1;
-   stbtt__bitmap gbm;
-   stbtt_vertex *vertices;
-   int num_verts = stbtt_GetGlyphShape(info, glyph, &vertices);
-
-   if (scale_x == 0) scale_x = scale_y;
-   if (scale_y == 0) {
-      if (scale_x == 0) {
-         STBTT_free(vertices, info->userdata);
-         return NULL;
-      }
-      scale_y = scale_x;
-   }
-
-   stbtt_GetGlyphBitmapBoxSubpixel(info, glyph, scale_x, scale_y, shift_x, shift_y, &ix0,&iy0,&ix1,&iy1);
-
-   // now we get the size
-   gbm.w = (ix1 - ix0);
-   gbm.h = (iy1 - iy0);
-   gbm.pixels = NULL; // in case we error
-
-   if (width ) *width  = gbm.w;
-   if (height) *height = gbm.h;
-   if (xoff  ) *xoff   = ix0;
-   if (yoff  ) *yoff   = iy0;
-
-   if (gbm.w && gbm.h) {
-      gbm.pixels = (unsigned char *) STBTT_malloc(gbm.w * gbm.h, info->userdata);
-      if (gbm.pixels) {
-         gbm.stride = gbm.w;
-
-         stbtt_Rasterize(&gbm, 0.35f, vertices, num_verts, scale_x, scale_y, shift_x, shift_y, ix0, iy0, 1, info->userdata);
-      }
-   }
-   STBTT_free(vertices, info->userdata);
-   return gbm.pixels;
-}
-
-STBTT_DEF unsigned char *stbtt_GetGlyphBitmap(const stbtt_fontinfo *info, double scale_x, double scale_y, int glyph, int *width, int *height, int *xoff, int *yoff)
-{
-   return stbtt_GetGlyphBitmapSubpixel(info, scale_x, scale_y, 0.0f, 0.0f, glyph, width, height, xoff, yoff);
-}
-
-STBTT_DEF void stbtt_MakeGlyphBitmapSubpixel(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, double shift_x, double shift_y, int glyph)
-{
-   int ix0,iy0;
-   stbtt_vertex *vertices;
-   int num_verts = stbtt_GetGlyphShape(info, glyph, &vertices);
-   stbtt__bitmap gbm;
-
-   stbtt_GetGlyphBitmapBoxSubpixel(info, glyph, scale_x, scale_y, shift_x, shift_y, &ix0,&iy0,0,0);
-   gbm.pixels = output;
-   gbm.w = out_w;
-   gbm.h = out_h;
-   gbm.stride = out_stride;
-
-   if (gbm.w && gbm.h)
-      stbtt_Rasterize(&gbm, 0.35f, vertices, num_verts, scale_x, scale_y, shift_x, shift_y, ix0,iy0, 1, info->userdata);
-
-   STBTT_free(vertices, info->userdata);
-}
-
-STBTT_DEF void stbtt_MakeGlyphBitmap(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, int glyph)
-{
-   stbtt_MakeGlyphBitmapSubpixel(info, output, out_w, out_h, out_stride, scale_x, scale_y, 0.0f,0.0f, glyph);
-}
-
-STBTT_DEF unsigned char *stbtt_GetCodepointBitmapSubpixel(const stbtt_fontinfo *info, double scale_x, double scale_y, double shift_x, double shift_y, int codepoint, int *width, int *height, int *xoff, int *yoff)
-{
-   return stbtt_GetGlyphBitmapSubpixel(info, scale_x, scale_y,shift_x,shift_y, stbtt_FindGlyphIndex(info,codepoint), width,height,xoff,yoff);
-}
-
-STBTT_DEF void stbtt_MakeCodepointBitmapSubpixelPrefilter(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, double shift_x, double shift_y, int oversample_x, int oversample_y, double *sub_x, double *sub_y, int codepoint)
-{
-   stbtt_MakeGlyphBitmapSubpixelPrefilter(info, output, out_w, out_h, out_stride, scale_x, scale_y, shift_x, shift_y, oversample_x, oversample_y, sub_x, sub_y, stbtt_FindGlyphIndex(info,codepoint));
-}
-
-STBTT_DEF void stbtt_MakeCodepointBitmapSubpixel(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, double shift_x, double shift_y, int codepoint)
-{
-   stbtt_MakeGlyphBitmapSubpixel(info, output, out_w, out_h, out_stride, scale_x, scale_y, shift_x, shift_y, stbtt_FindGlyphIndex(info,codepoint));
-}
-
-STBTT_DEF unsigned char *stbtt_GetCodepointBitmap(const stbtt_fontinfo *info, double scale_x, double scale_y, int codepoint, int *width, int *height, int *xoff, int *yoff)
-{
-   return stbtt_GetCodepointBitmapSubpixel(info, scale_x, scale_y, 0.0f,0.0f, codepoint, width,height,xoff,yoff);
-}
-
-STBTT_DEF void stbtt_MakeCodepointBitmap(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, int codepoint)
-{
-   stbtt_MakeCodepointBitmapSubpixel(info, output, out_w, out_h, out_stride, scale_x, scale_y, 0.0f,0.0f, codepoint);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// bitmap baking
-//
-// This is SUPER-CRAPPY packing to keep source code small
-
-static int stbtt_BakeFontBitmap_internal(unsigned char *data, int offset,  // font location (use offset=0 for plain .ttf)
-                                double pixel_height,                     // height of font in pixels
-                                unsigned char *pixels, int pw, int ph,  // bitmap to be filled in
-                                int first_char, int num_chars,          // characters to bake
-                                stbtt_bakedchar *chardata)
-{
-   double scale;
-   int x,y,bottom_y, i;
-   stbtt_fontinfo f;
-   f.userdata = NULL;
-   if (!stbtt_InitFont(&f, data, offset))
-      return -1;
-   STBTT_memset(pixels, 0, pw*ph); // background of 0 around pixels
-   x=y=1;
-   bottom_y = 1;
-
-   scale = stbtt_ScaleForPixelHeight(&f, pixel_height);
-
-   for (i=0; i < num_chars; ++i) {
-      int advance, lsb, x0,y0,x1,y1,gw,gh;
-      int g = stbtt_FindGlyphIndex(&f, first_char + i);
-      stbtt_GetGlyphHMetrics(&f, g, &advance, &lsb);
-      stbtt_GetGlyphBitmapBox(&f, g, scale,scale, &x0,&y0,&x1,&y1);
-      gw = x1-x0;
-      gh = y1-y0;
-      if (x + gw + 1 >= pw)
-         y = bottom_y, x = 1; // advance to next row
-      if (y + gh + 1 >= ph) // check if it fits vertically AFTER potentially moving to next row
-         return -i;
-      STBTT_assert(x+gw < pw);
-      STBTT_assert(y+gh < ph);
-      stbtt_MakeGlyphBitmap(&f, pixels+x+y*pw, gw,gh,pw, scale,scale, g);
-      chardata[i].x0 = (stbtt_int16) x;
-      chardata[i].y0 = (stbtt_int16) y;
-      chardata[i].x1 = (stbtt_int16) (x + gw);
-      chardata[i].y1 = (stbtt_int16) (y + gh);
-      chardata[i].xadvance = scale * advance;
-      chardata[i].xoff     = (double) x0;
-      chardata[i].yoff     = (double) y0;
-      x = x + gw + 1;
-      if (y+gh+1 > bottom_y)
-         bottom_y = y+gh+1;
-   }
-   return bottom_y;
-}
-
-STBTT_DEF void stbtt_GetBakedQuad(const stbtt_bakedchar *chardata, int pw, int ph, int char_index, double *xpos, double *ypos, stbtt_aligned_quad *q, int opengl_fillrule)
-{
-   double d3d_bias = opengl_fillrule ? 0 : -0.5f;
-   double ipw = 1.0f / pw, iph = 1.0f / ph;
-   const stbtt_bakedchar *b = chardata + char_index;
-   int round_x = STBTT_ifloor((*xpos + b->xoff) + 0.5f);
-   int round_y = STBTT_ifloor((*ypos + b->yoff) + 0.5f);
-
-   q->x0 = round_x + d3d_bias;
-   q->y0 = round_y + d3d_bias;
-   q->x1 = round_x + b->x1 - b->x0 + d3d_bias;
-   q->y1 = round_y + b->y1 - b->y0 + d3d_bias;
-
-   q->s0 = b->x0 * ipw;
-   q->t0 = b->y0 * iph;
-   q->s1 = b->x1 * ipw;
-   q->t1 = b->y1 * iph;
-
-   *xpos += b->xadvance;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// rectangle packing replacement routines if you don't have stb_rect_pack.h
-//
-
-#ifndef STB_RECT_PACK_VERSION
-
-typedef int stbrp_coord;
-
-////////////////////////////////////////////////////////////////////////////////////
-//                                                                                //
-//                                                                                //
-// COMPILER WARNING ?!?!?                                                         //
-//                                                                                //
-//                                                                                //
-// if you get a compile warning due to these symbols being defined more than      //
-// once, move #include "stb_rect_pack.h" before #include "stb_truetype.h"         //
-//                                                                                //
-////////////////////////////////////////////////////////////////////////////////////
-
-typedef struct
-{
-   int width,height;
-   int x,y,bottom_y;
-} stbrp_context;
-
-typedef struct
-{
-   unsigned char x;
-} stbrp_node;
-
-struct stbrp_rect
-{
-   stbrp_coord x,y;
-   int id,w,h,was_packed;
-};
-
-static void stbrp_init_target(stbrp_context *con, int pw, int ph, stbrp_node *nodes, int num_nodes)
-{
-   con->width  = pw;
-   con->height = ph;
-   con->x = 0;
-   con->y = 0;
-   con->bottom_y = 0;
-   STBTT__NOTUSED(nodes);
-   STBTT__NOTUSED(num_nodes);
-}
-
-static void stbrp_pack_rects(stbrp_context *con, stbrp_rect *rects, int num_rects)
-{
-   int i;
-   for (i=0; i < num_rects; ++i) {
-      if (con->x + rects[i].w > con->width) {
-         con->x = 0;
-         con->y = con->bottom_y;
-      }
-      if (con->y + rects[i].h > con->height)
-         break;
-      rects[i].x = con->x;
-      rects[i].y = con->y;
-      rects[i].was_packed = 1;
-      con->x += rects[i].w;
-      if (con->y + rects[i].h > con->bottom_y)
-         con->bottom_y = con->y + rects[i].h;
-   }
-   for (   ; i < num_rects; ++i)
-      rects[i].was_packed = 0;
-}
-#endif
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// bitmap baking
-//
-// This is SUPER-AWESOME (tm Ryan Gordon) packing using stb_rect_pack.h. If
-// stb_rect_pack.h isn't available, it uses the BakeFontBitmap strategy.
-
-STBTT_DEF int stbtt_PackBegin(stbtt_pack_context *spc, unsigned char *pixels, int pw, int ph, int stride_in_bytes, int padding, void *alloc_context)
-{
-   stbrp_context *context = (stbrp_context *) STBTT_malloc(sizeof(*context)            ,alloc_context);
-   int            num_nodes = pw - padding;
-   stbrp_node    *nodes   = (stbrp_node    *) STBTT_malloc(sizeof(*nodes  ) * num_nodes,alloc_context);
-
-   if (context == NULL || nodes == NULL) {
-      if (context != NULL) STBTT_free(context, alloc_context);
-      if (nodes   != NULL) STBTT_free(nodes  , alloc_context);
-      return 0;
-   }
-
-   spc->user_allocator_context = alloc_context;
-   spc->width = pw;
-   spc->height = ph;
-   spc->pixels = pixels;
-   spc->pack_info = context;
-   spc->nodes = nodes;
-   spc->padding = padding;
-   spc->stride_in_bytes = stride_in_bytes != 0 ? stride_in_bytes : pw;
-   spc->h_oversample = 1;
-   spc->v_oversample = 1;
-   spc->skip_missing = 0;
-
-   stbrp_init_target(context, pw-padding, ph-padding, nodes, num_nodes);
-
-   if (pixels)
-      STBTT_memset(pixels, 0, pw*ph); // background of 0 around pixels
-
-   return 1;
-}
-
-STBTT_DEF void stbtt_PackEnd  (stbtt_pack_context *spc)
-{
-   STBTT_free(spc->nodes    , spc->user_allocator_context);
-   STBTT_free(spc->pack_info, spc->user_allocator_context);
-}
-
-STBTT_DEF void stbtt_PackSetOversampling(stbtt_pack_context *spc, unsigned int h_oversample, unsigned int v_oversample)
-{
-   STBTT_assert(h_oversample <= STBTT_MAX_OVERSAMPLE);
-   STBTT_assert(v_oversample <= STBTT_MAX_OVERSAMPLE);
-   if (h_oversample <= STBTT_MAX_OVERSAMPLE)
-      spc->h_oversample = h_oversample;
-   if (v_oversample <= STBTT_MAX_OVERSAMPLE)
-      spc->v_oversample = v_oversample;
-}
-
-STBTT_DEF void stbtt_PackSetSkipMissingCodepoints(stbtt_pack_context *spc, int skip)
-{
-   spc->skip_missing = skip;
-}
-
-#define STBTT__OVER_MASK  (STBTT_MAX_OVERSAMPLE-1)
-
-static void stbtt__h_prefilter(unsigned char *pixels, int w, int h, int stride_in_bytes, unsigned int kernel_width)
-{
-   unsigned char buffer[STBTT_MAX_OVERSAMPLE];
-   int safe_w = w - kernel_width;
-   int j;
-   STBTT_memset(buffer, 0, STBTT_MAX_OVERSAMPLE); // suppress bogus warning from VS2013 -analyze
-   for (j=0; j < h; ++j) {
-      int i;
-      unsigned int total;
-      STBTT_memset(buffer, 0, kernel_width);
-
-      total = 0;
-
-      // make kernel_width a constant in common cases so compiler can optimize out the divide
-      switch (kernel_width) {
-         case 2:
-            for (i=0; i <= safe_w; ++i) {
-               total += pixels[i] - buffer[i & STBTT__OVER_MASK];
-               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i];
-               pixels[i] = (unsigned char) (total / 2);
-            }
-            break;
-         case 3:
-            for (i=0; i <= safe_w; ++i) {
-               total += pixels[i] - buffer[i & STBTT__OVER_MASK];
-               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i];
-               pixels[i] = (unsigned char) (total / 3);
-            }
-            break;
-         case 4:
-            for (i=0; i <= safe_w; ++i) {
-               total += pixels[i] - buffer[i & STBTT__OVER_MASK];
-               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i];
-               pixels[i] = (unsigned char) (total / 4);
-            }
-            break;
-         case 5:
-            for (i=0; i <= safe_w; ++i) {
-               total += pixels[i] - buffer[i & STBTT__OVER_MASK];
-               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i];
-               pixels[i] = (unsigned char) (total / 5);
-            }
-            break;
-         default:
-            for (i=0; i <= safe_w; ++i) {
-               total += pixels[i] - buffer[i & STBTT__OVER_MASK];
-               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i];
-               pixels[i] = (unsigned char) (total / kernel_width);
-            }
-            break;
-      }
-
-      for (; i < w; ++i) {
-         STBTT_assert(pixels[i] == 0);
-         total -= buffer[i & STBTT__OVER_MASK];
-         pixels[i] = (unsigned char) (total / kernel_width);
-      }
-
-      pixels += stride_in_bytes;
-   }
-}
-
-static void stbtt__v_prefilter(unsigned char *pixels, int w, int h, int stride_in_bytes, unsigned int kernel_width)
-{
-   unsigned char buffer[STBTT_MAX_OVERSAMPLE];
-   int safe_h = h - kernel_width;
-   int j;
-   STBTT_memset(buffer, 0, STBTT_MAX_OVERSAMPLE); // suppress bogus warning from VS2013 -analyze
-   for (j=0; j < w; ++j) {
-      int i;
-      unsigned int total;
-      STBTT_memset(buffer, 0, kernel_width);
-
-      total = 0;
-
-      // make kernel_width a constant in common cases so compiler can optimize out the divide
-      switch (kernel_width) {
-         case 2:
-            for (i=0; i <= safe_h; ++i) {
-               total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK];
-               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes];
-               pixels[i*stride_in_bytes] = (unsigned char) (total / 2);
-            }
-            break;
-         case 3:
-            for (i=0; i <= safe_h; ++i) {
-               total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK];
-               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes];
-               pixels[i*stride_in_bytes] = (unsigned char) (total / 3);
-            }
-            break;
-         case 4:
-            for (i=0; i <= safe_h; ++i) {
-               total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK];
-               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes];
-               pixels[i*stride_in_bytes] = (unsigned char) (total / 4);
-            }
-            break;
-         case 5:
-            for (i=0; i <= safe_h; ++i) {
-               total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK];
-               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes];
-               pixels[i*stride_in_bytes] = (unsigned char) (total / 5);
-            }
-            break;
-         default:
-            for (i=0; i <= safe_h; ++i) {
-               total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK];
-               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes];
-               pixels[i*stride_in_bytes] = (unsigned char) (total / kernel_width);
-            }
-            break;
-      }
-
-      for (; i < h; ++i) {
-         STBTT_assert(pixels[i*stride_in_bytes] == 0);
-         total -= buffer[i & STBTT__OVER_MASK];
-         pixels[i*stride_in_bytes] = (unsigned char) (total / kernel_width);
-      }
-
-      pixels += 1;
-   }
-}
-
-static double stbtt__oversample_shift(int oversample)
-{
-   if (!oversample)
-      return 0.0f;
-
-   // The prefilter is a box filter of width "oversample",
-   // which shifts phase by (oversample - 1)/2 pixels in
-   // oversampled space. We want to shift in the opposite
-   // direction to counter this.
-   return (double)-(oversample - 1) / (2.0f * (double)oversample);
-}
-
-// rects array must be big enough to accommodate all characters in the given ranges
-STBTT_DEF int stbtt_PackFontRangesGatherRects(stbtt_pack_context *spc, const stbtt_fontinfo *info, stbtt_pack_range *ranges, int num_ranges, stbrp_rect *rects)
-{
-   int i,j,k;
-   int missing_glyph_added = 0;
-
-   k=0;
-   for (i=0; i < num_ranges; ++i) {
-      double fh = ranges[i].font_size;
-      double scale = fh > 0 ? stbtt_ScaleForPixelHeight(info, fh) : stbtt_ScaleForMappingEmToPixels(info, -fh);
-      ranges[i].h_oversample = (unsigned char) spc->h_oversample;
-      ranges[i].v_oversample = (unsigned char) spc->v_oversample;
-      for (j=0; j < ranges[i].num_chars; ++j) {
-         int x0,y0,x1,y1;
-         int codepoint = ranges[i].array_of_unicode_codepoints == NULL ? ranges[i].first_unicode_codepoint_in_range + j : ranges[i].array_of_unicode_codepoints[j];
-         int glyph = stbtt_FindGlyphIndex(info, codepoint);
-         if (glyph == 0 && (spc->skip_missing || missing_glyph_added)) {
-            rects[k].w = rects[k].h = 0;
-         } else {
-            stbtt_GetGlyphBitmapBoxSubpixel(info,glyph,
-                                            scale * spc->h_oversample,
-                                            scale * spc->v_oversample,
-                                            0,0,
-                                            &x0,&y0,&x1,&y1);
-            rects[k].w = (stbrp_coord) (x1-x0 + spc->padding + spc->h_oversample-1);
-            rects[k].h = (stbrp_coord) (y1-y0 + spc->padding + spc->v_oversample-1);
-            if (glyph == 0)
-               missing_glyph_added = 1;
-         }
-         ++k;
-      }
-   }
-
-   return k;
-}
-
-STBTT_DEF void stbtt_MakeGlyphBitmapSubpixelPrefilter(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, double shift_x, double shift_y, int prefilter_x, int prefilter_y, double *sub_x, double *sub_y, int glyph)
-{
-   stbtt_MakeGlyphBitmapSubpixel(info,
-                                 output,
-                                 out_w - (prefilter_x - 1),
-                                 out_h - (prefilter_y - 1),
-                                 out_stride,
-                                 scale_x,
-                                 scale_y,
-                                 shift_x,
-                                 shift_y,
-                                 glyph);
-
-   if (prefilter_x > 1)
-      stbtt__h_prefilter(output, out_w, out_h, out_stride, prefilter_x);
-
-   if (prefilter_y > 1)
-      stbtt__v_prefilter(output, out_w, out_h, out_stride, prefilter_y);
-
-   *sub_x = stbtt__oversample_shift(prefilter_x);
-   *sub_y = stbtt__oversample_shift(prefilter_y);
-}
-
-// rects array must be big enough to accommodate all characters in the given ranges
-STBTT_DEF int stbtt_PackFontRangesRenderIntoRects(stbtt_pack_context *spc, const stbtt_fontinfo *info, stbtt_pack_range *ranges, int num_ranges, stbrp_rect *rects)
-{
-   int i,j,k, missing_glyph = -1, return_value = 1;
-
-   // save current values
-   int old_h_over = spc->h_oversample;
-   int old_v_over = spc->v_oversample;
-
-   k = 0;
-   for (i=0; i < num_ranges; ++i) {
-      double fh = ranges[i].font_size;
-      double scale = fh > 0 ? stbtt_ScaleForPixelHeight(info, fh) : stbtt_ScaleForMappingEmToPixels(info, -fh);
-      double recip_h,recip_v,sub_x,sub_y;
-      spc->h_oversample = ranges[i].h_oversample;
-      spc->v_oversample = ranges[i].v_oversample;
-      recip_h = 1.0f / spc->h_oversample;
-      recip_v = 1.0f / spc->v_oversample;
-      sub_x = stbtt__oversample_shift(spc->h_oversample);
-      sub_y = stbtt__oversample_shift(spc->v_oversample);
-      for (j=0; j < ranges[i].num_chars; ++j) {
-         stbrp_rect *r = &rects[k];
-         if (r->was_packed && r->w != 0 && r->h != 0) {
-            stbtt_packedchar *bc = &ranges[i].chardata_for_range[j];
-            int advance, lsb, x0,y0,x1,y1;
-            int codepoint = ranges[i].array_of_unicode_codepoints == NULL ? ranges[i].first_unicode_codepoint_in_range + j : ranges[i].array_of_unicode_codepoints[j];
-            int glyph = stbtt_FindGlyphIndex(info, codepoint);
-            stbrp_coord pad = (stbrp_coord) spc->padding;
-
-            // pad on left and top
-            r->x += pad;
-            r->y += pad;
-            r->w -= pad;
-            r->h -= pad;
-            stbtt_GetGlyphHMetrics(info, glyph, &advance, &lsb);
-            stbtt_GetGlyphBitmapBox(info, glyph,
-                                    scale * spc->h_oversample,
-                                    scale * spc->v_oversample,
-                                    &x0,&y0,&x1,&y1);
-            stbtt_MakeGlyphBitmapSubpixel(info,
-                                          spc->pixels + r->x + r->y*spc->stride_in_bytes,
-                                          r->w - spc->h_oversample+1,
-                                          r->h - spc->v_oversample+1,
-                                          spc->stride_in_bytes,
-                                          scale * spc->h_oversample,
-                                          scale * spc->v_oversample,
-                                          0,0,
-                                          glyph);
-
-            if (spc->h_oversample > 1)
-               stbtt__h_prefilter(spc->pixels + r->x + r->y*spc->stride_in_bytes,
-                                  r->w, r->h, spc->stride_in_bytes,
-                                  spc->h_oversample);
-
-            if (spc->v_oversample > 1)
-               stbtt__v_prefilter(spc->pixels + r->x + r->y*spc->stride_in_bytes,
-                                  r->w, r->h, spc->stride_in_bytes,
-                                  spc->v_oversample);
-
-            bc->x0       = (stbtt_int16)  r->x;
-            bc->y0       = (stbtt_int16)  r->y;
-            bc->x1       = (stbtt_int16) (r->x + r->w);
-            bc->y1       = (stbtt_int16) (r->y + r->h);
-            bc->xadvance =                scale * advance;
-            bc->xoff     =       (double)  x0 * recip_h + sub_x;
-            bc->yoff     =       (double)  y0 * recip_v + sub_y;
-            bc->xoff2    =                (x0 + r->w) * recip_h + sub_x;
-            bc->yoff2    =                (y0 + r->h) * recip_v + sub_y;
-
-            if (glyph == 0)
-               missing_glyph = j;
-         } else if (spc->skip_missing) {
-            return_value = 0;
-         } else if (r->was_packed && r->w == 0 && r->h == 0 && missing_glyph >= 0) {
-            ranges[i].chardata_for_range[j] = ranges[i].chardata_for_range[missing_glyph];
-         } else {
-            return_value = 0; // if any fail, report failure
-         }
-
-         ++k;
-      }
-   }
-
-   // restore original values
-   spc->h_oversample = old_h_over;
-   spc->v_oversample = old_v_over;
-
-   return return_value;
-}
-
-STBTT_DEF void stbtt_PackFontRangesPackRects(stbtt_pack_context *spc, stbrp_rect *rects, int num_rects)
-{
-   stbrp_pack_rects((stbrp_context *) spc->pack_info, rects, num_rects);
-}
-
-STBTT_DEF int stbtt_PackFontRanges(stbtt_pack_context *spc, const unsigned char *fontdata, int font_index, stbtt_pack_range *ranges, int num_ranges)
-{
-   stbtt_fontinfo info;
-   int i,j,n, return_value = 1;
-   //stbrp_context *context = (stbrp_context *) spc->pack_info;
-   stbrp_rect    *rects;
-
-   // flag all characters as NOT packed
-   for (i=0; i < num_ranges; ++i)
-      for (j=0; j < ranges[i].num_chars; ++j)
-         ranges[i].chardata_for_range[j].x0 =
-         ranges[i].chardata_for_range[j].y0 =
-         ranges[i].chardata_for_range[j].x1 =
-         ranges[i].chardata_for_range[j].y1 = 0;
-
-   n = 0;
-   for (i=0; i < num_ranges; ++i)
-      n += ranges[i].num_chars;
-
-   rects = (stbrp_rect *) STBTT_malloc(sizeof(*rects) * n, spc->user_allocator_context);
-   if (rects == NULL)
-      return 0;
-
-   info.userdata = spc->user_allocator_context;
-   stbtt_InitFont(&info, fontdata, stbtt_GetFontOffsetForIndex(fontdata,font_index));
-
-   n = stbtt_PackFontRangesGatherRects(spc, &info, ranges, num_ranges, rects);
-
-   stbtt_PackFontRangesPackRects(spc, rects, n);
-
-   return_value = stbtt_PackFontRangesRenderIntoRects(spc, &info, ranges, num_ranges, rects);
-
-   STBTT_free(rects, spc->user_allocator_context);
-   return return_value;
-}
-
-STBTT_DEF int stbtt_PackFontRange(stbtt_pack_context *spc, const unsigned char *fontdata, int font_index, double font_size,
-            int first_unicode_codepoint_in_range, int num_chars_in_range, stbtt_packedchar *chardata_for_range)
-{
-   stbtt_pack_range range;
-   range.first_unicode_codepoint_in_range = first_unicode_codepoint_in_range;
-   range.array_of_unicode_codepoints = NULL;
-   range.num_chars                   = num_chars_in_range;
-   range.chardata_for_range          = chardata_for_range;
-   range.font_size                   = font_size;
-   return stbtt_PackFontRanges(spc, fontdata, font_index, &range, 1);
-}
-
-STBTT_DEF void stbtt_GetScaledFontVMetrics(const unsigned char *fontdata, int index, double size, double *ascent, double *descent, double *lineGap)
-{
-   int i_ascent, i_descent, i_lineGap;
-   double scale;
-   stbtt_fontinfo info;
-   stbtt_InitFont(&info, fontdata, stbtt_GetFontOffsetForIndex(fontdata, index));
-   scale = size > 0 ? stbtt_ScaleForPixelHeight(&info, size) : stbtt_ScaleForMappingEmToPixels(&info, -size);
-   stbtt_GetFontVMetrics(&info, &i_ascent, &i_descent, &i_lineGap);
-   *ascent  = (double) i_ascent  * scale;
-   *descent = (double) i_descent * scale;
-   *lineGap = (double) i_lineGap * scale;
-}
-
-STBTT_DEF void stbtt_GetPackedQuad(const stbtt_packedchar *chardata, int pw, int ph, int char_index, double *xpos, double *ypos, stbtt_aligned_quad *q, int align_to_integer)
-{
-   double ipw = 1.0f / pw, iph = 1.0f / ph;
-   const stbtt_packedchar *b = chardata + char_index;
-
-   if (align_to_integer) {
-      double x = (double) STBTT_ifloor((*xpos + b->xoff) + 0.5f);
-      double y = (double) STBTT_ifloor((*ypos + b->yoff) + 0.5f);
-      q->x0 = x;
-      q->y0 = y;
-      q->x1 = x + b->xoff2 - b->xoff;
-      q->y1 = y + b->yoff2 - b->yoff;
-   } else {
-      q->x0 = *xpos + b->xoff;
-      q->y0 = *ypos + b->yoff;
-      q->x1 = *xpos + b->xoff2;
-      q->y1 = *ypos + b->yoff2;
-   }
-
-   q->s0 = b->x0 * ipw;
-   q->t0 = b->y0 * iph;
-   q->s1 = b->x1 * ipw;
-   q->t1 = b->y1 * iph;
-
-   *xpos += b->xadvance;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// sdf computation
-//
-
-#define STBTT_min(a,b)  ((a) < (b) ? (a) : (b))
-#define STBTT_max(a,b)  ((a) < (b) ? (b) : (a))
-
-static int stbtt__ray_intersect_bezier(double orig[2], double ray[2], double q0[2], double q1[2], double q2[2], double hits[2][2])
-{
-   double q0perp = q0[1]*ray[0] - q0[0]*ray[1];
-   double q1perp = q1[1]*ray[0] - q1[0]*ray[1];
-   double q2perp = q2[1]*ray[0] - q2[0]*ray[1];
-   double roperp = orig[1]*ray[0] - orig[0]*ray[1];
-
-   double a = q0perp - 2*q1perp + q2perp;
-   double b = q1perp - q0perp;
-   double c = q0perp - roperp;
-
-   double s0 = 0., s1 = 0.;
-   int num_s = 0;
-
-   if (a != 0.0) {
-      double discr = b*b - a*c;
-      if (discr > 0.0) {
-         double rcpna = -1 / a;
-         double d = (double) STBTT_sqrt(discr);
-         s0 = (b+d) * rcpna;
-         s1 = (b-d) * rcpna;
-         if (s0 >= 0.0 && s0 <= 1.0)
-            num_s = 1;
-         if (d > 0.0 && s1 >= 0.0 && s1 <= 1.0) {
-            if (num_s == 0) s0 = s1;
-            ++num_s;
-         }
-      }
-   } else {
-      // 2*b*s + c = 0
-      // s = -c / (2*b)
-      s0 = c / (-2 * b);
-      if (s0 >= 0.0 && s0 <= 1.0)
-         num_s = 1;
-   }
-
-   if (num_s == 0)
-      return 0;
-   else {
-      double rcp_len2 = 1 / (ray[0]*ray[0] + ray[1]*ray[1]);
-      double rayn_x = ray[0] * rcp_len2, rayn_y = ray[1] * rcp_len2;
-
-      double q0d =   q0[0]*rayn_x +   q0[1]*rayn_y;
-      double q1d =   q1[0]*rayn_x +   q1[1]*rayn_y;
-      double q2d =   q2[0]*rayn_x +   q2[1]*rayn_y;
-      double rod = orig[0]*rayn_x + orig[1]*rayn_y;
-
-      double q10d = q1d - q0d;
-      double q20d = q2d - q0d;
-      double q0rd = q0d - rod;
-
-      hits[0][0] = q0rd + s0*(2.0f - 2.0f*s0)*q10d + s0*s0*q20d;
-      hits[0][1] = a*s0+b;
-
-      if (num_s > 1) {
-         hits[1][0] = q0rd + s1*(2.0f - 2.0f*s1)*q10d + s1*s1*q20d;
-         hits[1][1] = a*s1+b;
-         return 2;
-      } else {
-         return 1;
-      }
-   }
-}
-
-static int equal(double *a, double *b)
-{
-   return (a[0] == b[0] && a[1] == b[1]);
-}
-
-static int stbtt__compute_crossings_x(double x, double y, int nverts, stbtt_vertex *verts)
-{
-   int i;
-   double orig[2], ray[2] = { 1, 0 };
-   double y_frac;
-   int winding = 0;
-
-   orig[0] = x;
-   orig[1] = y;
-
-   // make sure y never passes through a vertex of the shape
-   y_frac = (double) STBTT_fmod(y, 1.0f);
-   if (y_frac < 0.01f)
-      y += 0.01f;
-   else if (y_frac > 0.99f)
-      y -= 0.01f;
-   orig[1] = y;
-
-   // test a ray from (-infinity,y) to (x,y)
-   for (i=0; i < nverts; ++i) {
-      if (verts[i].type == STBTT_vline) {
-         int x0 = (int) verts[i-1].x, y0 = (int) verts[i-1].y;
-         int x1 = (int) verts[i  ].x, y1 = (int) verts[i  ].y;
-         if (y > STBTT_min(y0,y1) && y < STBTT_max(y0,y1) && x > STBTT_min(x0,x1)) {
-            double x_inter = (y - y0) / (y1 - y0) * (x1-x0) + x0;
-            if (x_inter < x)
-               winding += (y0 < y1) ? 1 : -1;
-         }
-      }
-      if (verts[i].type == STBTT_vcurve) {
-         int x0 = (int) verts[i-1].x , y0 = (int) verts[i-1].y ;
-         int x1 = (int) verts[i  ].cx, y1 = (int) verts[i  ].cy;
-         int x2 = (int) verts[i  ].x , y2 = (int) verts[i  ].y ;
-         int ax = STBTT_min(x0,STBTT_min(x1,x2)), ay = STBTT_min(y0,STBTT_min(y1,y2));
-         int by = STBTT_max(y0,STBTT_max(y1,y2));
-         if (y > ay && y < by && x > ax) {
-            double q0[2],q1[2],q2[2];
-            double hits[2][2];
-            q0[0] = (double)x0;
-            q0[1] = (double)y0;
-            q1[0] = (double)x1;
-            q1[1] = (double)y1;
-            q2[0] = (double)x2;
-            q2[1] = (double)y2;
-            if (equal(q0,q1) || equal(q1,q2)) {
-               x0 = (int)verts[i-1].x;
-               y0 = (int)verts[i-1].y;
-               x1 = (int)verts[i  ].x;
-               y1 = (int)verts[i  ].y;
-               if (y > STBTT_min(y0,y1) && y < STBTT_max(y0,y1) && x > STBTT_min(x0,x1)) {
-                  double x_inter = (y - y0) / (y1 - y0) * (x1-x0) + x0;
-                  if (x_inter < x)
-                     winding += (y0 < y1) ? 1 : -1;
-               }
-            } else {
-               int num_hits = stbtt__ray_intersect_bezier(orig, ray, q0, q1, q2, hits);
-               if (num_hits >= 1)
-                  if (hits[0][0] < 0)
-                     winding += (hits[0][1] < 0 ? -1 : 1);
-               if (num_hits >= 2)
-                  if (hits[1][0] < 0)
-                     winding += (hits[1][1] < 0 ? -1 : 1);
-            }
-         }
-      }
-   }
-   return winding;
-}
-
-static double stbtt__cuberoot( double x )
-{
-   if (x<0)
-      return -(double) STBTT_pow(-x,1.0f/3.0f);
-   else
-      return  (double) STBTT_pow( x,1.0f/3.0f);
-}
-
-// x^3 + c*x^2 + b*x + a = 0
-static int stbtt__solve_cubic(double a, double b, double c, double* r)
-{
-	double s = -a / 3;
-	double p = b - a*a / 3;
-	double q = a * (2*a*a - 9*b) / 27 + c;
-   double p3 = p*p*p;
-	double d = q*q + 4*p3 / 27;
-	if (d >= 0) {
-		double z = (double) STBTT_sqrt(d);
-		double u = (-q + z) / 2;
-		double v = (-q - z) / 2;
-		u = stbtt__cuberoot(u);
-		v = stbtt__cuberoot(v);
-		r[0] = s + u + v;
-		return 1;
-	} else {
-	   double u = (double) STBTT_sqrt(-p/3);
-	   double v = (double) STBTT_acos(-STBTT_sqrt(-27/p3) * q / 2) / 3; // p3 must be negative, since d is negative
-	   double m = (double) STBTT_cos(v);
-      double n = (double) STBTT_cos(v-3.141592/2)*1.732050808f;
-	   r[0] = s + u * 2 * m;
-	   r[1] = s - u * (m + n);
-	   r[2] = s - u * (m - n);
-
-      //STBTT_assert( STBTT_fabs(((r[0]+a)*r[0]+b)*r[0]+c) < 0.05f);  // these asserts may not be safe at all scales, though they're in bezier t parameter units so maybe?
-      //STBTT_assert( STBTT_fabs(((r[1]+a)*r[1]+b)*r[1]+c) < 0.05f);
-      //STBTT_assert( STBTT_fabs(((r[2]+a)*r[2]+b)*r[2]+c) < 0.05f);
-   	return 3;
-   }
-}
-
-STBTT_DEF unsigned char * stbtt_GetGlyphSDF(const stbtt_fontinfo *info, double scale, int glyph, int padding, unsigned char onedge_value, double pixel_dist_scale, int *width, int *height, int *xoff, int *yoff)
-{
-   double scale_x = scale, scale_y = scale;
-   int ix0,iy0,ix1,iy1;
-   int w,h;
-   unsigned char *data;
-
-   if (scale == 0) return NULL;
-
-   stbtt_GetGlyphBitmapBoxSubpixel(info, glyph, scale, scale, 0.0f,0.0f, &ix0,&iy0,&ix1,&iy1);
-
-   // if empty, return NULL
-   if (ix0 == ix1 || iy0 == iy1)
-      return NULL;
-
-   ix0 -= padding;
-   iy0 -= padding;
-   ix1 += padding;
-   iy1 += padding;
-
-   w = (ix1 - ix0);
-   h = (iy1 - iy0);
-
-   if (width ) *width  = w;
-   if (height) *height = h;
-   if (xoff  ) *xoff   = ix0;
-   if (yoff  ) *yoff   = iy0;
-
-   // invert for y-downwards bitmaps
-   scale_y = -scale_y;
-
-   {
-      int x,y,i,j;
-      double *precompute;
-      stbtt_vertex *verts;
-      int num_verts = stbtt_GetGlyphShape(info, glyph, &verts);
-      data = (unsigned char *) STBTT_malloc(w * h, info->userdata);
-      precompute = (double *) STBTT_malloc(num_verts * sizeof(double), info->userdata);
-
-      for (i=0,j=num_verts-1; i < num_verts; j=i++) {
-         if (verts[i].type == STBTT_vline) {
-            double x0 = verts[i].x*scale_x, y0 = verts[i].y*scale_y;
-            double x1 = verts[j].x*scale_x, y1 = verts[j].y*scale_y;
-            double dist = (double) STBTT_sqrt((x1-x0)*(x1-x0) + (y1-y0)*(y1-y0));
-            precompute[i] = (dist == 0) ? 0.0f : 1.0f / dist;
-         } else if (verts[i].type == STBTT_vcurve) {
-            double x2 = verts[j].x *scale_x, y2 = verts[j].y *scale_y;
-            double x1 = verts[i].cx*scale_x, y1 = verts[i].cy*scale_y;
-            double x0 = verts[i].x *scale_x, y0 = verts[i].y *scale_y;
-            double bx = x0 - 2*x1 + x2, by = y0 - 2*y1 + y2;
-            double len2 = bx*bx + by*by;
-            if (len2 != 0.0f)
-               precompute[i] = 1.0f / (bx*bx + by*by);
-            else
-               precompute[i] = 0.0f;
-         } else
-            precompute[i] = 0.0f;
-      }
-
-      for (y=iy0; y < iy1; ++y) {
-         for (x=ix0; x < ix1; ++x) {
-            double val;
-            double min_dist = 999999.0f;
-            double sx = (double) x + 0.5f;
-            double sy = (double) y + 0.5f;
-            double x_gspace = (sx / scale_x);
-            double y_gspace = (sy / scale_y);
-
-            int winding = stbtt__compute_crossings_x(x_gspace, y_gspace, num_verts, verts); // @OPTIMIZE: this could just be a rasterization, but needs to be line vs. non-tesselated curves so a new path
-
-            for (i=0; i < num_verts; ++i) {
-               double x0 = verts[i].x*scale_x, y0 = verts[i].y*scale_y;
-
-               // check against every point here rather than inside line/curve primitives -- @TODO: wrong if multiple 'moves' in a row produce a garbage point, and given culling, probably more efficient to do within line/curve
-               double dist2 = (x0-sx)*(x0-sx) + (y0-sy)*(y0-sy);
-               if (dist2 < min_dist*min_dist)
-                  min_dist = (double) STBTT_sqrt(dist2);
-
-               if (verts[i].type == STBTT_vline) {
-                  double x1 = verts[i-1].x*scale_x, y1 = verts[i-1].y*scale_y;
-
-                  // coarse culling against bbox
-                  //if (sx > STBTT_min(x0,x1)-min_dist && sx < STBTT_max(x0,x1)+min_dist &&
-                  //    sy > STBTT_min(y0,y1)-min_dist && sy < STBTT_max(y0,y1)+min_dist)
-                  double dist = (double) STBTT_fabs((x1-x0)*(y0-sy) - (y1-y0)*(x0-sx)) * precompute[i];
-                  STBTT_assert(i != 0);
-                  if (dist < min_dist) {
-                     // check position along line
-                     // x' = x0 + t*(x1-x0), y' = y0 + t*(y1-y0)
-                     // minimize (x'-sx)*(x'-sx)+(y'-sy)*(y'-sy)
-                     double dx = x1-x0, dy = y1-y0;
-                     double px = x0-sx, py = y0-sy;
-                     // minimize (px+t*dx)^2 + (py+t*dy)^2 = px*px + 2*px*dx*t + t^2*dx*dx + py*py + 2*py*dy*t + t^2*dy*dy
-                     // derivative: 2*px*dx + 2*py*dy + (2*dx*dx+2*dy*dy)*t, set to 0 and solve
-                     double t = -(px*dx + py*dy) / (dx*dx + dy*dy);
-                     if (t >= 0.0f && t <= 1.0f)
-                        min_dist = dist;
-                  }
-               } else if (verts[i].type == STBTT_vcurve) {
-                  double x2 = verts[i-1].x *scale_x, y2 = verts[i-1].y *scale_y;
-                  double x1 = verts[i  ].cx*scale_x, y1 = verts[i  ].cy*scale_y;
-                  double box_x0 = STBTT_min(STBTT_min(x0,x1),x2);
-                  double box_y0 = STBTT_min(STBTT_min(y0,y1),y2);
-                  double box_x1 = STBTT_max(STBTT_max(x0,x1),x2);
-                  double box_y1 = STBTT_max(STBTT_max(y0,y1),y2);
-                  // coarse culling against bbox to avoid computing cubic unnecessarily
-                  if (sx > box_x0-min_dist && sx < box_x1+min_dist && sy > box_y0-min_dist && sy < box_y1+min_dist) {
-                     int num=0;
-                     double ax = x1-x0, ay = y1-y0;
-                     double bx = x0 - 2*x1 + x2, by = y0 - 2*y1 + y2;
-                     double mx = x0 - sx, my = y0 - sy;
-                     double res[3]={0},px,py,t,it;
-                     double a_inv = precompute[i];
-                     if (a_inv == 0.0) { // if a_inv is 0, it's 2nd degree so use quadratic formula
-                        double a = 3*(ax*bx + ay*by);
-                        double b = 2*(ax*ax + ay*ay) + (mx*bx+my*by);
-                        double c = mx*ax+my*ay;
-                        if (a == 0.0) { // if a is 0, it's linear
-                           if (b != 0.0) {
-                              res[num++] = -c/b;
-                           }
-                        } else {
-                           double discriminant = b*b - 4*a*c;
-                           if (discriminant < 0)
-                              num = 0;
-                           else {
-                              double root = (double) STBTT_sqrt(discriminant);
-                              res[0] = (-b - root)/(2*a);
-                              res[1] = (-b + root)/(2*a);
-                              num = 2; // don't bother distinguishing 1-solution case, as code below will still work
-                           }
-                        }
-                     } else {
-                        double b = 3*(ax*bx + ay*by) * a_inv; // could precompute this as it doesn't depend on sample point
-                        double c = (2*(ax*ax + ay*ay) + (mx*bx+my*by)) * a_inv;
-                        double d = (mx*ax+my*ay) * a_inv;
-                        num = stbtt__solve_cubic(b, c, d, res);
-                     }
-                     if (num >= 1 && res[0] >= 0.0f && res[0] <= 1.0f) {
-                        t = res[0], it = 1.0f - t;
-                        px = it*it*x0 + 2*t*it*x1 + t*t*x2;
-                        py = it*it*y0 + 2*t*it*y1 + t*t*y2;
-                        dist2 = (px-sx)*(px-sx) + (py-sy)*(py-sy);
-                        if (dist2 < min_dist * min_dist)
-                           min_dist = (double) STBTT_sqrt(dist2);
-                     }
-                     if (num >= 2 && res[1] >= 0.0f && res[1] <= 1.0f) {
-                        t = res[1], it = 1.0f - t;
-                        px = it*it*x0 + 2*t*it*x1 + t*t*x2;
-                        py = it*it*y0 + 2*t*it*y1 + t*t*y2;
-                        dist2 = (px-sx)*(px-sx) + (py-sy)*(py-sy);
-                        if (dist2 < min_dist * min_dist)
-                           min_dist = (double) STBTT_sqrt(dist2);
-                     }
-                     if (num >= 3 && res[2] >= 0.0f && res[2] <= 1.0f) {
-                        t = res[2], it = 1.0f - t;
-                        px = it*it*x0 + 2*t*it*x1 + t*t*x2;
-                        py = it*it*y0 + 2*t*it*y1 + t*t*y2;
-                        dist2 = (px-sx)*(px-sx) + (py-sy)*(py-sy);
-                        if (dist2 < min_dist * min_dist)
-                           min_dist = (double) STBTT_sqrt(dist2);
-                     }
-                  }
-               }
-            }
-            if (winding == 0)
-               min_dist = -min_dist;  // if outside the shape, value is negative
-            val = onedge_value + pixel_dist_scale * min_dist;
-            if (val < 0)
-               val = 0;
-            else if (val > 255)
-               val = 255;
-            data[(y-iy0)*w+(x-ix0)] = (unsigned char) val;
-         }
-      }
-      STBTT_free(precompute, info->userdata);
-      STBTT_free(verts, info->userdata);
-   }
-   return data;
-}
-
-STBTT_DEF unsigned char * stbtt_GetCodepointSDF(const stbtt_fontinfo *info, double scale, int codepoint, int padding, unsigned char onedge_value, double pixel_dist_scale, int *width, int *height, int *xoff, int *yoff)
-{
-   return stbtt_GetGlyphSDF(info, scale, stbtt_FindGlyphIndex(info, codepoint), padding, onedge_value, pixel_dist_scale, width, height, xoff, yoff);
-}
-
-STBTT_DEF void stbtt_FreeSDF(unsigned char *bitmap, void *userdata)
-{
-   STBTT_free(bitmap, userdata);
-}
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// font name matching -- recommended not to use this
-//
-
-// check if a utf8 string contains a prefix which is the utf16 string; if so return length of matching utf8 string
-static stbtt_int32 stbtt__CompareUTF8toUTF16_bigendian_prefix(stbtt_uint8 *s1, stbtt_int32 len1, stbtt_uint8 *s2, stbtt_int32 len2)
-{
-   stbtt_int32 i=0;
-
-   // convert utf16 to utf8 and compare the results while converting
-   while (len2) {
-      stbtt_uint16 ch = s2[0]*256 + s2[1];
-      if (ch < 0x80) {
-         if (i >= len1) return -1;
-         if (s1[i++] != ch) return -1;
-      } else if (ch < 0x800) {
-         if (i+1 >= len1) return -1;
-         if (s1[i++] != 0xc0 + (ch >> 6)) return -1;
-         if (s1[i++] != 0x80 + (ch & 0x3f)) return -1;
-      } else if (ch >= 0xd800 && ch < 0xdc00) {
-         stbtt_uint32 c;
-         stbtt_uint16 ch2 = s2[2]*256 + s2[3];
-         if (i+3 >= len1) return -1;
-         c = ((ch - 0xd800) << 10) + (ch2 - 0xdc00) + 0x10000;
-         if (s1[i++] != 0xf0 + (c >> 18)) return -1;
-         if (s1[i++] != 0x80 + ((c >> 12) & 0x3f)) return -1;
-         if (s1[i++] != 0x80 + ((c >>  6) & 0x3f)) return -1;
-         if (s1[i++] != 0x80 + ((c      ) & 0x3f)) return -1;
-         s2 += 2; // plus another 2 below
-         len2 -= 2;
-      } else if (ch >= 0xdc00 && ch < 0xe000) {
-         return -1;
-      } else {
-         if (i+2 >= len1) return -1;
-         if (s1[i++] != 0xe0 + (ch >> 12)) return -1;
-         if (s1[i++] != 0x80 + ((ch >> 6) & 0x3f)) return -1;
-         if (s1[i++] != 0x80 + ((ch     ) & 0x3f)) return -1;
-      }
-      s2 += 2;
-      len2 -= 2;
-   }
-   return i;
-}
-
-static int stbtt_CompareUTF8toUTF16_bigendian_internal(char *s1, int len1, char *s2, int len2)
-{
-   return len1 == stbtt__CompareUTF8toUTF16_bigendian_prefix((stbtt_uint8*) s1, len1, (stbtt_uint8*) s2, len2);
-}
-
-// returns results in whatever encoding you request... but note that 2-byte encodings
-// will be BIG-ENDIAN... use stbtt_CompareUTF8toUTF16_bigendian() to compare
-STBTT_DEF const char *stbtt_GetFontNameString(const stbtt_fontinfo *font, int *length, int platformID, int encodingID, int languageID, int nameID)
-{
-   stbtt_int32 i,count,stringOffset;
-   stbtt_uint8 *fc = font->data;
-   stbtt_uint32 offset = font->fontstart;
-   stbtt_uint32 nm = stbtt__find_table(fc, offset, "name");
-   if (!nm) return NULL;
-
-   count = ttUSHORT(fc+nm+2);
-   stringOffset = nm + ttUSHORT(fc+nm+4);
-   for (i=0; i < count; ++i) {
-      stbtt_uint32 loc = nm + 6 + 12 * i;
-      if (platformID == ttUSHORT(fc+loc+0) && encodingID == ttUSHORT(fc+loc+2)
-          && languageID == ttUSHORT(fc+loc+4) && nameID == ttUSHORT(fc+loc+6)) {
-         *length = ttUSHORT(fc+loc+8);
-         return (const char *) (fc+stringOffset+ttUSHORT(fc+loc+10));
-      }
-   }
-   return NULL;
-}
-
-static int stbtt__matchpair(stbtt_uint8 *fc, stbtt_uint32 nm, stbtt_uint8 *name, stbtt_int32 nlen, stbtt_int32 target_id, stbtt_int32 next_id)
-{
-   stbtt_int32 i;
-   stbtt_int32 count = ttUSHORT(fc+nm+2);
-   stbtt_int32 stringOffset = nm + ttUSHORT(fc+nm+4);
-
-   for (i=0; i < count; ++i) {
-      stbtt_uint32 loc = nm + 6 + 12 * i;
-      stbtt_int32 id = ttUSHORT(fc+loc+6);
-      if (id == target_id) {
-         // find the encoding
-         stbtt_int32 platform = ttUSHORT(fc+loc+0), encoding = ttUSHORT(fc+loc+2), language = ttUSHORT(fc+loc+4);
-
-         // is this a Unicode encoding?
-         if (platform == 0 || (platform == 3 && encoding == 1) || (platform == 3 && encoding == 10)) {
-            stbtt_int32 slen = ttUSHORT(fc+loc+8);
-            stbtt_int32 off = ttUSHORT(fc+loc+10);
-
-            // check if there's a prefix match
-            stbtt_int32 matchlen = stbtt__CompareUTF8toUTF16_bigendian_prefix(name, nlen, fc+stringOffset+off,slen);
-            if (matchlen >= 0) {
-               // check for target_id+1 immediately following, with same encoding & language
-               if (i+1 < count && ttUSHORT(fc+loc+12+6) == next_id && ttUSHORT(fc+loc+12) == platform && ttUSHORT(fc+loc+12+2) == encoding && ttUSHORT(fc+loc+12+4) == language) {
-                  slen = ttUSHORT(fc+loc+12+8);
-                  off = ttUSHORT(fc+loc+12+10);
-                  if (slen == 0) {
-                     if (matchlen == nlen)
-                        return 1;
-                  } else if (matchlen < nlen && name[matchlen] == ' ') {
-                     ++matchlen;
-                     if (stbtt_CompareUTF8toUTF16_bigendian_internal((char*) (name+matchlen), nlen-matchlen, (char*)(fc+stringOffset+off),slen))
-                        return 1;
-                  }
-               } else {
-                  // if nothing immediately following
-                  if (matchlen == nlen)
-                     return 1;
-               }
-            }
-         }
-
-         // @TODO handle other encodings
-      }
-   }
-   return 0;
-}
-
-static int stbtt__matches(stbtt_uint8 *fc, stbtt_uint32 offset, stbtt_uint8 *name, stbtt_int32 flags)
-{
-   stbtt_int32 nlen = (stbtt_int32) STBTT_strlen((char *) name);
-   stbtt_uint32 nm,hd;
-   if (!stbtt__isfont(fc+offset)) return 0;
-
-   // check italics/bold/underline flags in macStyle...
-   if (flags) {
-      hd = stbtt__find_table(fc, offset, "head");
-      if ((ttUSHORT(fc+hd+44) & 7) != (flags & 7)) return 0;
-   }
-
-   nm = stbtt__find_table(fc, offset, "name");
-   if (!nm) return 0;
-
-   if (flags) {
-      // if we checked the macStyle flags, then just check the family and ignore the subfamily
-      if (stbtt__matchpair(fc, nm, name, nlen, 16, -1))  return 1;
-      if (stbtt__matchpair(fc, nm, name, nlen,  1, -1))  return 1;
-      if (stbtt__matchpair(fc, nm, name, nlen,  3, -1))  return 1;
-   } else {
-      if (stbtt__matchpair(fc, nm, name, nlen, 16, 17))  return 1;
-      if (stbtt__matchpair(fc, nm, name, nlen,  1,  2))  return 1;
-      if (stbtt__matchpair(fc, nm, name, nlen,  3, -1))  return 1;
-   }
-
-   return 0;
-}
-
-static int stbtt_FindMatchingFont_internal(unsigned char *font_collection, char *name_utf8, stbtt_int32 flags)
-{
-   stbtt_int32 i;
-   for (i=0;;++i) {
-      stbtt_int32 off = stbtt_GetFontOffsetForIndex(font_collection, i);
-      if (off < 0) return off;
-      if (stbtt__matches((stbtt_uint8 *) font_collection, off, (stbtt_uint8*) name_utf8, flags))
-         return off;
-   }
-}
-
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wcast-qual"
-#endif
-
-STBTT_DEF int stbtt_BakeFontBitmap(const unsigned char *data, int offset,
-                                double pixel_height, unsigned char *pixels, int pw, int ph,
-                                int first_char, int num_chars, stbtt_bakedchar *chardata)
-{
-   return stbtt_BakeFontBitmap_internal((unsigned char *) data, offset, pixel_height, pixels, pw, ph, first_char, num_chars, chardata);
-}
-
-STBTT_DEF int stbtt_GetFontOffsetForIndex(const unsigned char *data, int index)
-{
-   return stbtt_GetFontOffsetForIndex_internal((unsigned char *) data, index);
-}
-
-STBTT_DEF int stbtt_GetNumberOfFonts(const unsigned char *data)
-{
-   return stbtt_GetNumberOfFonts_internal((unsigned char *) data);
-}
-
-STBTT_DEF int stbtt_InitFont(stbtt_fontinfo *info, const unsigned char *data, int offset)
-{
-   return stbtt_InitFont_internal(info, (unsigned char *) data, offset);
-}
-
-STBTT_DEF int stbtt_FindMatchingFont(const unsigned char *fontdata, const char *name, int flags)
-{
-   return stbtt_FindMatchingFont_internal((unsigned char *) fontdata, (char *) name, flags);
-}
-
-STBTT_DEF int stbtt_CompareUTF8toUTF16_bigendian(const char *s1, int len1, const char *s2, int len2)
-{
-   return stbtt_CompareUTF8toUTF16_bigendian_internal((char *) s1, len1, (char *) s2, len2);
-}
-
-#if defined(__GNUC__) || defined(__clang__)
-#pragma GCC diagnostic pop
-#endif
-
-#endif // STB_TRUETYPE_IMPLEMENTATION
-
-
-// FULL VERSION HISTORY
-//
-//   1.19 (2018-02-11) OpenType GPOS kerning (horizontal only), STBTT_fmod
-//   1.18 (2018-01-29) add missing function
-//   1.17 (2017-07-23) make more arguments const; doc fix
-//   1.16 (2017-07-12) SDF support
-//   1.15 (2017-03-03) make more arguments const
-//   1.14 (2017-01-16) num-fonts-in-TTC function
-//   1.13 (2017-01-02) support OpenType fonts, certain Apple fonts
-//   1.12 (2016-10-25) suppress warnings about casting away const with -Wcast-qual
-//   1.11 (2016-04-02) fix unused-variable warning
-//   1.10 (2016-04-02) allow user-defined fabs() replacement
-//                     fix memory leak if fontsize=0.0
-//                     fix warning from duplicate typedef
-//   1.09 (2016-01-16) warning fix; avoid crash on outofmem; use alloc userdata for PackFontRanges
-//   1.08 (2015-09-13) document stbtt_Rasterize(); fixes for vertical & horizontal edges
-//   1.07 (2015-08-01) allow PackFontRanges to accept arrays of sparse codepoints;
-//                     allow PackFontRanges to pack and render in separate phases;
-//                     fix stbtt_GetFontOFfsetForIndex (never worked for non-0 input?);
-//                     fixed an assert() bug in the new rasterizer
-//                     replace assert() with STBTT_assert() in new rasterizer
-//   1.06 (2015-07-14) performance improvements (~35% faster on x86 and x64 on test machine)
-//                     also more precise AA rasterizer, except if shapes overlap
-//                     remove need for STBTT_sort
-//   1.05 (2015-04-15) fix misplaced definitions for STBTT_STATIC
-//   1.04 (2015-04-15) typo in example
-//   1.03 (2015-04-12) STBTT_STATIC, fix memory leak in new packing, various fixes
-//   1.02 (2014-12-10) fix various warnings & compile issues w/ stb_rect_pack, C++
-//   1.01 (2014-12-08) fix subpixel position when oversampling to exactly match
-//                        non-oversampled; STBTT_POINT_SIZE for packed case only
-//   1.00 (2014-12-06) add new PackBegin etc. API, w/ support for oversampling
-//   0.99 (2014-09-18) fix multiple bugs with subpixel rendering (ryg)
-//   0.9  (2014-08-07) support certain mac/iOS fonts without an MS platformID
-//   0.8b (2014-07-07) fix a warning
-//   0.8  (2014-05-25) fix a few more warnings
-//   0.7  (2013-09-25) bugfix: subpixel glyph bug fixed in 0.5 had come back
-//   0.6c (2012-07-24) improve documentation
-//   0.6b (2012-07-20) fix a few more warnings
-//   0.6  (2012-07-17) fix warnings; added stbtt_ScaleForMappingEmToPixels,
-//                        stbtt_GetFontBoundingBox, stbtt_IsGlyphEmpty
-//   0.5  (2011-12-09) bugfixes:
-//                        subpixel glyph renderer computed wrong bounding box
-//                        first vertex of shape can be off-curve (FreeSans)
-//   0.4b (2011-12-03) fixed an error in the font baking example
-//   0.4  (2011-12-01) kerning, subpixel rendering (tor)
-//                    bugfixes for:
-//                        codepoint-to-glyph conversion using table fmt=12
-//                        codepoint-to-glyph conversion using table fmt=4
-//                        stbtt_GetBakedQuad with non-square texture (Zer)
-//                    updated Hello World! sample to use kerning and subpixel
-//                    fixed some warnings
-//   0.3  (2009-06-24) cmap fmt=12, compound shapes (MM)
-//                    userdata, malloc-from-userdata, non-zero fill (stb)
-//   0.2  (2009-03-11) Fix unsigned/signed char warnings
-//   0.1  (2009-03-09) First public release
-//
-
-/*
-------------------------------------------------------------------------------
-This software is available under 2 licenses -- choose whichever you prefer.
-------------------------------------------------------------------------------
-ALTERNATIVE A - MIT License
-Copyright (c) 2017 Sean Barrett
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-------------------------------------------------------------------------------
-ALTERNATIVE B - Public Domain (www.unlicense.org)
-This is free and unencumbered software released into the public domain.
-Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
-software, either in source code form or as a compiled binary, for any purpose,
-commercial or non-commercial, and by any means.
-In jurisdictions that recognize copyright laws, the author or authors of this
-software dedicate any and all copyright interest in the software to the public
-domain. We make this dedication for the benefit of the public at large and to
-the detriment of our heirs and successors. We intend this dedication to be an
-overt act of relinquishment in perpetuity of all present and future rights to
-this software under copyright law.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------------
-*/
+// stb_truetype.h - v1.24 - public domain
+// authored from 2009-2020 by Sean Barrett / RAD Game Tools
+//
+// =======================================================================
+//
+//    NO SECURITY GUARANTEE -- DO NOT USE THIS ON UNTRUSTED FONT FILES
+//
+// This library does no range checking of the offsets found in the file,
+// meaning an attacker can use it to read arbitrary memory.
+//
+// =======================================================================
+//
+//   This library processes TrueType files:
+//        parse files
+//        extract glyph metrics
+//        extract glyph shapes
+//        render glyphs to one-channel bitmaps with antialiasing (box filter)
+//        render glyphs to one-channel SDF bitmaps (signed-distance field/function)
+//
+//   Todo:
+//        non-MS cmaps
+//        crashproof on bad data
+//        hinting? (no longer patented)
+//        cleartype-style AA?
+//        optimize: use simple memory allocator for intermediates
+//        optimize: build edge-list directly from curves
+//        optimize: rasterize directly from curves?
+//
+// ADDITIONAL CONTRIBUTORS
+//
+//   Mikko Mononen: compound shape support, more cmap formats
+//   Tor Andersson: kerning, subpixel rendering
+//   Dougall Johnson: OpenType / Type 2 font handling
+//   Daniel Ribeiro Maciel: basic GPOS-based kerning
+//
+//   Misc other:
+//       Ryan Gordon
+//       Simon Glass
+//       github:IntellectualKitty
+//       Imanol Celaya
+//       Daniel Ribeiro Maciel
+//
+//   Bug/warning reports/fixes:
+//       "Zer" on mollyrocket       Fabian "ryg" Giesen   github:NiLuJe
+//       Cass Everitt               Martins Mozeiko       github:aloucks
+//       stoiko (Haemimont Games)   Cap Petschulat        github:oyvindjam
+//       Brian Hook                 Omar Cornut           github:vassvik
+//       Walter van Niftrik         Ryan Griege
+//       David Gow                  Peter LaValle
+//       David Given                Sergey Popov
+//       Ivan-Assen Ivanov          Giumo X. Clanjor
+//       Anthony Pesch              Higor Euripedes
+//       Johan Duparc               Thomas Fields
+//       Hou Qiming                 Derek Vinyard
+//       Rob Loach                  Cort Stratton
+//       Kenney Phillis Jr.         Brian Costabile
+//       Ken Voskuil (kaesve)
+//
+// VERSION HISTORY
+//
+//   1.24 (2020-02-05) fix warning
+//   1.23 (2020-02-02) query SVG data for glyphs; query whole kerning table (but only kern not GPOS)
+//   1.22 (2019-08-11) minimize missing-glyph duplication; fix kerning if both 'GPOS' and 'kern' are defined
+//   1.21 (2019-02-25) fix warning
+//   1.20 (2019-02-07) PackFontRange skips missing codepoints; GetScaleFontVMetrics()
+//   1.19 (2018-02-11) GPOS kerning, STBTT_fmod
+//   1.18 (2018-01-29) add missing function
+//   1.17 (2017-07-23) make more arguments const; doc fix
+//   1.16 (2017-07-12) SDF support
+//   1.15 (2017-03-03) make more arguments const
+//   1.14 (2017-01-16) num-fonts-in-TTC function
+//   1.13 (2017-01-02) support OpenType fonts, certain Apple fonts
+//   1.12 (2016-10-25) suppress warnings about casting away const with -Wcast-qual
+//   1.11 (2016-04-02) fix unused-variable warning
+//   1.10 (2016-04-02) user-defined fabs(); rare memory leak; remove duplicate typedef
+//   1.09 (2016-01-16) warning fix; avoid crash on outofmem; use allocation userdata properly
+//   1.08 (2015-09-13) document stbtt_Rasterize(); fixes for vertical & horizontal edges
+//   1.07 (2015-08-01) allow PackFontRanges to accept arrays of sparse codepoints;
+//                     variant PackFontRanges to pack and render in separate phases;
+//                     fix stbtt_GetFontOFfsetForIndex (never worked for non-0 input?);
+//                     fixed an assert() bug in the new rasterizer
+//                     replace assert() with STBTT_assert() in new rasterizer
+//
+//   Full history can be found at the end of this file.
+//
+// LICENSE
+//
+//   See end of file for license information.
+//
+// USAGE
+//
+//   Include this file in whatever places need to refer to it. In ONE C/C++
+//   file, write:
+//      #define STB_TRUETYPE_IMPLEMENTATION
+//   before the #include of this file. This expands out the actual
+//   implementation into that C/C++ file.
+//
+//   To make the implementation private to the file that generates the implementation,
+//      #define STBTT_STATIC
+//
+//   Simple 3D API (don't ship this, but it's fine for tools and quick start)
+//           stbtt_BakeFontBitmap()               -- bake a font to a bitmap for use as texture
+//           stbtt_GetBakedQuad()                 -- compute quad to draw for a given char
+//
+//   Improved 3D API (more shippable):
+//           #include "stb_rect_pack.h"           -- optional, but you really want it
+//           stbtt_PackBegin()
+//           stbtt_PackSetOversampling()          -- for improved quality on small fonts
+//           stbtt_PackFontRanges()               -- pack and renders
+//           stbtt_PackEnd()
+//           stbtt_GetPackedQuad()
+//
+//   "Load" a font file from a memory buffer (you have to keep the buffer loaded)
+//           stbtt_InitFont()
+//           stbtt_GetFontOffsetForIndex()        -- indexing for TTC font collections
+//           stbtt_GetNumberOfFonts()             -- number of fonts for TTC font collections
+//
+//   Render a unicode codepoint to a bitmap
+//           stbtt_GetCodepointBitmap()           -- allocates and returns a bitmap
+//           stbtt_MakeCodepointBitmap()          -- renders into bitmap you provide
+//           stbtt_GetCodepointBitmapBox()        -- how big the bitmap must be
+//
+//   Character advance/positioning
+//           stbtt_GetCodepointHMetrics()
+//           stbtt_GetFontVMetrics()
+//           stbtt_GetFontVMetricsOS2()
+//           stbtt_GetCodepointKernAdvance()
+//
+//   Starting with version 1.06, the rasterizer was replaced with a new,
+//   faster and generally-more-precise rasterizer. The new rasterizer more
+//   accurately measures pixel coverage for anti-aliasing, except in the case
+//   where multiple shapes overlap, in which case it overestimates the AA pixel
+//   coverage. Thus, anti-aliasing of intersecting shapes may look wrong. If
+//   this turns out to be a problem, you can re-enable the old rasterizer with
+//        #define STBTT_RASTERIZER_VERSION 1
+//   which will incur about a 15% speed hit.
+//
+// ADDITIONAL DOCUMENTATION
+//
+//   Immediately after this block comment are a series of sample programs.
+//
+//   After the sample programs is the "header file" section. This section
+//   includes documentation for each API function.
+//
+//   Some important concepts to understand to use this library:
+//
+//      Codepoint
+//         Characters are defined by unicode codepoints, e.g. 65 is
+//         uppercase A, 231 is lowercase c with a cedilla, 0x7e30 is
+//         the hiragana for "ma".
+//
+//      Glyph
+//         A visual character shape (every codepoint is rendered as
+//         some glyph)
+//
+//      Glyph index
+//         A font-specific integer ID representing a glyph
+//
+//      Baseline
+//         Glyph shapes are defined relative to a baseline, which is the
+//         bottom of uppercase characters. Characters extend both above
+//         and below the baseline.
+//
+//      Current Point
+//         As you draw text to the screen, you keep track of a "current point"
+//         which is the origin of each character. The current point's vertical
+//         position is the baseline. Even "baked fonts" use this model.
+//
+//      Vertical Font Metrics
+//         The vertical qualities of the font, used to vertically position
+//         and space the characters. See docs for stbtt_GetFontVMetrics.
+//
+//      Font Size in Pixels or Points
+//         The preferred interface for specifying font sizes in stb_truetype
+//         is to specify how tall the font's vertical extent should be in pixels.
+//         If that sounds good enough, skip the next paragraph.
+//
+//         Most font APIs instead use "points", which are a common typographic
+//         measurement for describing font size, defined as 72 points per inch.
+//         stb_truetype provides a point API for compatibility. However, true
+//         "per inch" conventions don't make much sense on computer displays
+//         since different monitors have different number of pixels per
+//         inch. For example, Windows traditionally uses a convention that
+//         there are 96 pixels per inch, thus making 'inch' measurements have
+//         nothing to do with inches, and thus effectively defining a point to
+//         be 1.333 pixels. Additionally, the TrueType font data provides
+//         an explicit scale factor to scale a given font's glyphs to points,
+//         but the author has observed that this scale factor is often wrong
+//         for non-commercial fonts, thus making fonts scaled in points
+//         according to the TrueType spec incoherently sized in practice.
+//
+// DETAILED USAGE:
+//
+//  Scale:
+//    Select how high you want the font to be, in points or pixels.
+//    Call ScaleForPixelHeight or ScaleForMappingEmToPixels to compute
+//    a scale factor SF that will be used by all other functions.
+//
+//  Baseline:
+//    You need to select a y-coordinate that is the baseline of where
+//    your text will appear. Call GetFontBoundingBox to get the baseline-relative
+//    bounding box for all characters. SF*-y0 will be the distance in pixels
+//    that the worst-case character could extend above the baseline, so if
+//    you want the top edge of characters to appear at the top of the
+//    screen where y=0, then you would set the baseline to SF*-y0.
+//
+//  Current point:
+//    Set the current point where the first character will appear. The
+//    first character could extend left of the current point; this is font
+//    dependent. You can either choose a current point that is the leftmost
+//    point and hope, or add some padding, or check the bounding box or
+//    left-side-bearing of the first character to be displayed and set
+//    the current point based on that.
+//
+//  Displaying a character:
+//    Compute the bounding box of the character. It will contain signed values
+//    relative to <current_point, baseline>. I.e. if it returns x0,y0,x1,y1,
+//    then the character should be displayed in the rectangle from
+//    <current_point+SF*x0, baseline+SF*y0> to <current_point+SF*x1,baseline+SF*y1).
+//
+//  Advancing for the next character:
+//    Call GlyphHMetrics, and compute 'current_point += SF * advance'.
+//
+//
+// ADVANCED USAGE
+//
+//   Quality:
+//
+//    - Use the functions with Subpixel at the end to allow your characters
+//      to have subpixel positioning. Since the font is anti-aliased, not
+//      hinted, this is very import for quality. (This is not possible with
+//      baked fonts.)
+//
+//    - Kerning is now supported, and if you're supporting subpixel rendering
+//      then kerning is worth using to give your text a polished look.
+//
+//   Performance:
+//
+//    - Convert Unicode codepoints to glyph indexes and operate on the glyphs;
+//      if you don't do this, stb_truetype is forced to do the conversion on
+//      every call.
+//
+//    - There are a lot of memory allocations. We should modify it to take
+//      a temp buffer and allocate from the temp buffer (without freeing),
+//      should help performance a lot.
+//
+// NOTES
+//
+//   The system uses the raw data found in the .ttf file without changing it
+//   and without building auxiliary data structures. This is a bit inefficient
+//   on little-endian systems (the data is big-endian), but assuming you're
+//   caching the bitmaps or glyph shapes this shouldn't be a big deal.
+//
+//   It appears to be very hard to programmatically determine what font a
+//   given file is in a general way. I provide an API for this, but I don't
+//   recommend it.
+//
+//
+// PERFORMANCE MEASUREMENTS FOR 1.06:
+//
+//                      32-bit     64-bit
+//   Previous release:  8.83 s     7.68 s
+//   Pool allocations:  7.72 s     6.34 s
+//   Inline sort     :  6.54 s     5.65 s
+//   New rasterizer  :  5.63 s     5.00 s
+
+//////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////
+////
+////  SAMPLE PROGRAMS
+////
+//
+//  Incomplete text-in-3d-api example, which draws quads properly aligned to be lossless
+//
+#if 0
+#define STB_TRUETYPE_IMPLEMENTATION  // force following include to generate implementation
+#include "stb_truetype.h"
+
+unsigned char ttf_buffer[1<<20];
+unsigned char temp_bitmap[512*512];
+
+stbtt_bakedchar cdata[96]; // ASCII 32..126 is 95 glyphs
+GLuint ftex;
+
+void my_stbtt_initfont(void)
+{
+   fread(ttf_buffer, 1, 1<<20, fopen("c:/windows/fonts/times.ttf", "rb"));
+   stbtt_BakeFontBitmap(ttf_buffer,0, 32.0, temp_bitmap,512,512, 32,96, cdata); // no guarantee this fits!
+   // can free ttf_buffer at this point
+   glGenTextures(1, &ftex);
+   glBindTexture(GL_TEXTURE_2D, ftex);
+   glTexImage2D(GL_TEXTURE_2D, 0, GL_ALPHA, 512,512, 0, GL_ALPHA, GL_UNSIGNED_BYTE, temp_bitmap);
+   // can free temp_bitmap at this point
+   glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+}
+
+void my_stbtt_print(double x, double y, char *text)
+{
+   // assume orthographic projection with units = screen pixels, origin at top left
+   glEnable(GL_TEXTURE_2D);
+   glBindTexture(GL_TEXTURE_2D, ftex);
+   glBegin(GL_QUADS);
+   while (*text) {
+      if (*text >= 32 && *text < 128) {
+         stbtt_aligned_quad q;
+         stbtt_GetBakedQuad(cdata, 512,512, *text-32, &x,&y,&q,1);//1=opengl & d3d10+,0=d3d9
+         glTexCoord2f(q.s0,q.t1); glVertex2f(q.x0,q.y0);
+         glTexCoord2f(q.s1,q.t1); glVertex2f(q.x1,q.y0);
+         glTexCoord2f(q.s1,q.t0); glVertex2f(q.x1,q.y1);
+         glTexCoord2f(q.s0,q.t0); glVertex2f(q.x0,q.y1);
+      }
+      ++text;
+   }
+   glEnd();
+}
+#endif
+//
+//
+//////////////////////////////////////////////////////////////////////////////
+//
+// Complete program (this compiles): get a single bitmap, print as ASCII art
+//
+#if 0
+#include <stdio.h>
+#define STB_TRUETYPE_IMPLEMENTATION  // force following include to generate implementation
+#include "stb_truetype.h"
+
+char ttf_buffer[1<<25];
+
+int main(int argc, char **argv)
+{
+   stbtt_fontinfo font;
+   unsigned char *bitmap;
+   int w,h,i,j,c = (argc > 1 ? atoi(argv[1]) : 'a'), s = (argc > 2 ? atoi(argv[2]) : 20);
+
+   fread(ttf_buffer, 1, 1<<25, fopen(argc > 3 ? argv[3] : "c:/windows/fonts/arialbd.ttf", "rb"));
+
+   stbtt_InitFont(&font, ttf_buffer, stbtt_GetFontOffsetForIndex(ttf_buffer,0));
+   bitmap = stbtt_GetCodepointBitmap(&font, 0,stbtt_ScaleForPixelHeight(&font, s), c, &w, &h, 0,0);
+
+   for (j=0; j < h; ++j) {
+      for (i=0; i < w; ++i)
+         putchar(" .:ioVM@"[bitmap[j*w+i]>>5]);
+      putchar('\n');
+   }
+   return 0;
+}
+#endif
+//
+// Output:
+//
+//     .ii.
+//    @@@@@@.
+//   V@Mio@@o
+//   :i.  V@V
+//     :oM@@M
+//   :@@@MM@M
+//   @@o  o@M
+//  :@@.  M@M
+//   @@@o@@@@
+//   :M@@V:@@.
+//
+//////////////////////////////////////////////////////////////////////////////
+//
+// Complete program: print "Hello World!" banner, with bugs
+//
+#if 0
+char buffer[24<<20];
+unsigned char screen[20][79];
+
+int main(int arg, char **argv)
+{
+   stbtt_fontinfo font;
+   int i,j,ascent,baseline,ch=0;
+   double scale, xpos=2; // leave a little padding in case the character extends left
+   char *text = "Heljo World!"; // intentionally misspelled to show 'lj' brokenness
+
+   fread(buffer, 1, 1000000, fopen("c:/windows/fonts/arialbd.ttf", "rb"));
+   stbtt_InitFont(&font, buffer, 0);
+
+   scale = stbtt_ScaleForPixelHeight(&font, 15);
+   stbtt_GetFontVMetrics(&font, &ascent,0,0);
+   baseline = (int) (ascent*scale);
+
+   while (text[ch]) {
+      int advance,lsb,x0,y0,x1,y1;
+      double x_shift = xpos - (double) floor(xpos);
+      stbtt_GetCodepointHMetrics(&font, text[ch], &advance, &lsb);
+      stbtt_GetCodepointBitmapBoxSubpixel(&font, text[ch], scale,scale,x_shift,0, &x0,&y0,&x1,&y1);
+      stbtt_MakeCodepointBitmapSubpixel(&font, &screen[baseline + y0][(int) xpos + x0], x1-x0,y1-y0, 79, scale,scale,x_shift,0, text[ch]);
+      // note that this stomps the old data, so where character boxes overlap (e.g. 'lj') it's wrong
+      // because this API is really for baking character bitmaps into textures. if you want to render
+      // a sequence of characters, you really need to render each bitmap to a temp buffer, then
+      // "alpha blend" that into the working buffer
+      xpos += (advance * scale);
+      if (text[ch+1])
+         xpos += scale*stbtt_GetCodepointKernAdvance(&font, text[ch],text[ch+1]);
+      ++ch;
+   }
+
+   for (j=0; j < 20; ++j) {
+      for (i=0; i < 78; ++i)
+         putchar(" .:ioVM@"[screen[j][i]>>5]);
+      putchar('\n');
+   }
+
+   return 0;
+}
+#endif
+
+
+//////////////////////////////////////////////////////////////////////////////
+//////////////////////////////////////////////////////////////////////////////
+////
+////   INTEGRATION WITH YOUR CODEBASE
+////
+////   The following sections allow you to supply alternate definitions
+////   of C library functions used by stb_truetype, e.g. if you don't
+////   link with the C runtime library.
+
+#ifdef STB_TRUETYPE_IMPLEMENTATION
+   // #define your own (u)stbtt_int8/16/32 before including to override this
+   #ifndef stbtt_uint8
+   typedef unsigned char   stbtt_uint8;
+   typedef signed   char   stbtt_int8;
+   typedef unsigned short  stbtt_uint16;
+   typedef signed   short  stbtt_int16;
+   typedef unsigned int    stbtt_uint32;
+   typedef signed   int    stbtt_int32;
+   #endif
+
+   typedef char stbtt__check_size32[sizeof(stbtt_int32)==4 ? 1 : -1];
+   typedef char stbtt__check_size16[sizeof(stbtt_int16)==2 ? 1 : -1];
+
+   // e.g. #define your own STBTT_ifloor/STBTT_iceil() to avoid math.h
+   #ifndef STBTT_ifloor
+   #include <math.h>
+   #define STBTT_ifloor(x)   ((int) floor(x))
+   #define STBTT_iceil(x)    ((int) ceil(x))
+   #endif
+
+   #ifndef STBTT_sqrt
+   #include <math.h>
+   #define STBTT_sqrt(x)      sqrt(x)
+   #define STBTT_pow(x,y)     pow(x,y)
+   #endif
+
+   #ifndef STBTT_fmod
+   #include <math.h>
+   #define STBTT_fmod(x,y)    fmod(x,y)
+   #endif
+
+   #ifndef STBTT_cos
+   #include <math.h>
+   #define STBTT_cos(x)       cos(x)
+   #define STBTT_acos(x)      acos(x)
+   #endif
+
+   #ifndef STBTT_fabs
+   #include <math.h>
+   #define STBTT_fabs(x)      fabs(x)
+   #endif
+
+   // #define your own functions "STBTT_malloc" / "STBTT_free" to avoid malloc.h
+   #ifndef STBTT_malloc
+   #include <stdlib.h>
+   #define STBTT_malloc(x,u)  ((void)(u),malloc(x))
+   #define STBTT_free(x,u)    ((void)(u),free(x))
+   #endif
+
+   #ifndef STBTT_assert
+   #include <assert.h>
+   #define STBTT_assert(x)    assert(x)
+   #endif
+
+   #ifndef STBTT_strlen
+   #include <string.h>
+   #define STBTT_strlen(x)    strlen(x)
+   #endif
+
+   #ifndef STBTT_memcpy
+   #include <string.h>
+   #define STBTT_memcpy       memcpy
+   #define STBTT_memset       memset
+   #endif
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+////
+////   INTERFACE
+////
+////
+
+#ifndef __STB_INCLUDE_STB_TRUETYPE_H__
+#define __STB_INCLUDE_STB_TRUETYPE_H__
+
+#ifdef STBTT_STATIC
+#define STBTT_DEF static
+#else
+#define STBTT_DEF extern
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// private structure
+typedef struct
+{
+   unsigned char *data;
+   int cursor;
+   int size;
+} stbtt__buf;
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// TEXTURE BAKING API
+//
+// If you use this API, you only have to call two functions ever.
+//
+
+typedef struct
+{
+   unsigned short x0,y0,x1,y1; // coordinates of bbox in bitmap
+   double xoff,yoff,xadvance;
+} stbtt_bakedchar;
+
+STBTT_DEF int stbtt_BakeFontBitmap(const unsigned char *data, int offset,  // font location (use offset=0 for plain .ttf)
+                                double pixel_height,                     // height of font in pixels
+                                unsigned char *pixels, int pw, int ph,  // bitmap to be filled in
+                                int first_char, int num_chars,          // characters to bake
+                                stbtt_bakedchar *chardata);             // you allocate this, it's num_chars long
+// if return is positive, the first unused row of the bitmap
+// if return is negative, returns the negative of the number of characters that fit
+// if return is 0, no characters fit and no rows were used
+// This uses a very crappy packing.
+
+typedef struct
+{
+   double x0,y0,s0,t0; // top-left
+   double x1,y1,s1,t1; // bottom-right
+} stbtt_aligned_quad;
+
+STBTT_DEF void stbtt_GetBakedQuad(const stbtt_bakedchar *chardata, int pw, int ph,  // same data as above
+                               int char_index,             // character to display
+                               double *xpos, double *ypos,   // pointers to current position in screen pixel space
+                               stbtt_aligned_quad *q,      // output: quad to draw
+                               int opengl_fillrule);       // true if opengl fill rule; false if DX9 or earlier
+// Call GetBakedQuad with char_index = 'character - first_char', and it
+// creates the quad you need to draw and advances the current position.
+//
+// The coordinate system used assumes y increases downwards.
+//
+// Characters will extend both above and below the current position;
+// see discussion of "BASELINE" above.
+//
+// It's inefficient; you might want to c&p it and optimize it.
+
+STBTT_DEF void stbtt_GetScaledFontVMetrics(const unsigned char *fontdata, int index, double size, double *ascent, double *descent, double *lineGap);
+// Query the font vertical metrics without having to create a font first.
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// NEW TEXTURE BAKING API
+//
+// This provides options for packing multiple fonts into one atlas, not
+// perfectly but better than nothing.
+
+typedef struct
+{
+   unsigned short x0,y0,x1,y1; // coordinates of bbox in bitmap
+   double xoff,yoff,xadvance;
+   double xoff2,yoff2;
+} stbtt_packedchar;
+
+typedef struct stbtt_pack_context stbtt_pack_context;
+typedef struct stbtt_fontinfo stbtt_fontinfo;
+#ifndef STB_RECT_PACK_VERSION
+typedef struct stbrp_rect stbrp_rect;
+#endif
+
+STBTT_DEF int  stbtt_PackBegin(stbtt_pack_context *spc, unsigned char *pixels, int width, int height, int stride_in_bytes, int padding, void *alloc_context);
+// Initializes a packing context stored in the passed-in stbtt_pack_context.
+// Future calls using this context will pack characters into the bitmap passed
+// in here: a 1-channel bitmap that is width * height. stride_in_bytes is
+// the distance from one row to the next (or 0 to mean they are packed tightly
+// together). "padding" is the amount of padding to leave between each
+// character (normally you want '1' for bitmaps you'll use as textures with
+// bilinear filtering).
+//
+// Returns 0 on failure, 1 on success.
+
+STBTT_DEF void stbtt_PackEnd  (stbtt_pack_context *spc);
+// Cleans up the packing context and frees all memory.
+
+#define STBTT_POINT_SIZE(x)   (-(x))
+
+STBTT_DEF int  stbtt_PackFontRange(stbtt_pack_context *spc, const unsigned char *fontdata, int font_index, double font_size,
+                                int first_unicode_char_in_range, int num_chars_in_range, stbtt_packedchar *chardata_for_range);
+// Creates character bitmaps from the font_index'th font found in fontdata (use
+// font_index=0 if you don't know what that is). It creates num_chars_in_range
+// bitmaps for characters with unicode values starting at first_unicode_char_in_range
+// and increasing. Data for how to render them is stored in chardata_for_range;
+// pass these to stbtt_GetPackedQuad to get back renderable quads.
+//
+// font_size is the full height of the character from ascender to descender,
+// as computed by stbtt_ScaleForPixelHeight. To use a point size as computed
+// by stbtt_ScaleForMappingEmToPixels, wrap the point size in STBTT_POINT_SIZE()
+// and pass that result as 'font_size':
+//       ...,                  20 , ... // font max minus min y is 20 pixels tall
+//       ..., STBTT_POINT_SIZE(20), ... // 'M' is 20 pixels tall
+
+typedef struct
+{
+   double font_size;
+   int first_unicode_codepoint_in_range;  // if non-zero, then the chars are continuous, and this is the first codepoint
+   int *array_of_unicode_codepoints;       // if non-zero, then this is an array of unicode codepoints
+   int num_chars;
+   stbtt_packedchar *chardata_for_range; // output
+   unsigned char h_oversample, v_oversample; // don't set these, they're used internally
+} stbtt_pack_range;
+
+STBTT_DEF int  stbtt_PackFontRanges(stbtt_pack_context *spc, const unsigned char *fontdata, int font_index, stbtt_pack_range *ranges, int num_ranges);
+// Creates character bitmaps from multiple ranges of characters stored in
+// ranges. This will usually create a better-packed bitmap than multiple
+// calls to stbtt_PackFontRange. Note that you can call this multiple
+// times within a single PackBegin/PackEnd.
+
+STBTT_DEF void stbtt_PackSetOversampling(stbtt_pack_context *spc, unsigned int h_oversample, unsigned int v_oversample);
+// Oversampling a font increases the quality by allowing higher-quality subpixel
+// positioning, and is especially valuable at smaller text sizes.
+//
+// This function sets the amount of oversampling for all following calls to
+// stbtt_PackFontRange(s) or stbtt_PackFontRangesGatherRects for a given
+// pack context. The default (no oversampling) is achieved by h_oversample=1
+// and v_oversample=1. The total number of pixels required is
+// h_oversample*v_oversample larger than the default; for example, 2x2
+// oversampling requires 4x the storage of 1x1. For best results, render
+// oversampled textures with bilinear filtering. Look at the readme in
+// stb/tests/oversample for information about oversampled fonts
+//
+// To use with PackFontRangesGather etc., you must set it before calls
+// call to PackFontRangesGatherRects.
+
+STBTT_DEF void stbtt_PackSetSkipMissingCodepoints(stbtt_pack_context *spc, int skip);
+// If skip != 0, this tells stb_truetype to skip any codepoints for which
+// there is no corresponding glyph. If skip=0, which is the default, then
+// codepoints without a glyph recived the font's "missing character" glyph,
+// typically an empty box by convention.
+
+STBTT_DEF void stbtt_GetPackedQuad(const stbtt_packedchar *chardata, int pw, int ph,  // same data as above
+                               int char_index,             // character to display
+                               double *xpos, double *ypos,   // pointers to current position in screen pixel space
+                               stbtt_aligned_quad *q,      // output: quad to draw
+                               int align_to_integer);
+
+STBTT_DEF int  stbtt_PackFontRangesGatherRects(stbtt_pack_context *spc, const stbtt_fontinfo *info, stbtt_pack_range *ranges, int num_ranges, stbrp_rect *rects);
+STBTT_DEF void stbtt_PackFontRangesPackRects(stbtt_pack_context *spc, stbrp_rect *rects, int num_rects);
+STBTT_DEF int  stbtt_PackFontRangesRenderIntoRects(stbtt_pack_context *spc, const stbtt_fontinfo *info, stbtt_pack_range *ranges, int num_ranges, stbrp_rect *rects);
+// Calling these functions in sequence is roughly equivalent to calling
+// stbtt_PackFontRanges(). If you more control over the packing of multiple
+// fonts, or if you want to pack custom data into a font texture, take a look
+// at the source to of stbtt_PackFontRanges() and create a custom version
+// using these functions, e.g. call GatherRects multiple times,
+// building up a single array of rects, then call PackRects once,
+// then call RenderIntoRects repeatedly. This may result in a
+// better packing than calling PackFontRanges multiple times
+// (or it may not).
+
+// this is an opaque structure that you shouldn't mess with which holds
+// all the context needed from PackBegin to PackEnd.
+struct stbtt_pack_context {
+   void *user_allocator_context;
+   void *pack_info;
+   int   width;
+   int   height;
+   int   stride_in_bytes;
+   int   padding;
+   int   skip_missing;
+   unsigned int   h_oversample, v_oversample;
+   unsigned char *pixels;
+   void  *nodes;
+};
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// FONT LOADING
+//
+//
+
+STBTT_DEF int stbtt_GetNumberOfFonts(const unsigned char *data);
+// This function will determine the number of fonts in a font file.  TrueType
+// collection (.ttc) files may contain multiple fonts, while TrueType font
+// (.ttf) files only contain one font. The number of fonts can be used for
+// indexing with the previous function where the index is between zero and one
+// less than the total fonts. If an error occurs, -1 is returned.
+
+STBTT_DEF int stbtt_GetFontOffsetForIndex(const unsigned char *data, int index);
+// Each .ttf/.ttc file may have more than one font. Each font has a sequential
+// index number starting from 0. Call this function to get the font offset for
+// a given index; it returns -1 if the index is out of range. A regular .ttf
+// file will only define one font and it always be at offset 0, so it will
+// return '0' for index 0, and -1 for all other indices.
+
+// The following structure is defined publicly so you can declare one on
+// the stack or as a global or etc, but you should treat it as opaque.
+struct stbtt_fontinfo
+{
+   void           * userdata;
+   unsigned char  * data;              // pointer to .ttf file
+   int              fontstart;         // offset of start of font
+
+   int numGlyphs;                     // number of glyphs, needed for range checking
+
+   int loca,head,glyf,hhea,hmtx,kern,gpos,svg; // table locations as offset from start of .ttf
+   int index_map;                     // a cmap mapping for our chosen character encoding
+   int indexToLocFormat;              // format needed to map from glyph index to glyph
+
+   stbtt__buf cff;                    // cff font data
+   stbtt__buf charstrings;            // the charstring index
+   stbtt__buf gsubrs;                 // global charstring subroutines index
+   stbtt__buf subrs;                  // private charstring subroutines index
+   stbtt__buf fontdicts;              // array of font dicts
+   stbtt__buf fdselect;               // map from glyph to fontdict
+};
+
+STBTT_DEF int stbtt_InitFont(stbtt_fontinfo *info, const unsigned char *data, int offset);
+// Given an offset into the file that defines a font, this function builds
+// the necessary cached info for the rest of the system. You must allocate
+// the stbtt_fontinfo yourself, and stbtt_InitFont will fill it out. You don't
+// need to do anything special to free it, because the contents are pure
+// value data with no additional data structures. Returns 0 on failure.
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// CHARACTER TO GLYPH-INDEX CONVERSIOn
+
+STBTT_DEF int stbtt_FindGlyphIndex(const stbtt_fontinfo *info, int unicode_codepoint);
+// If you're going to perform multiple operations on the same character
+// and you want a speed-up, call this function with the character you're
+// going to process, then use glyph-based functions instead of the
+// codepoint-based functions.
+// Returns 0 if the character codepoint is not defined in the font.
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// CHARACTER PROPERTIES
+//
+
+STBTT_DEF double stbtt_ScaleForPixelHeight(const stbtt_fontinfo *info, double pixels);
+// computes a scale factor to produce a font whose "height" is 'pixels' tall.
+// Height is measured as the distance from the highest ascender to the lowest
+// descender; in other words, it's equivalent to calling stbtt_GetFontVMetrics
+// and computing:
+//       scale = pixels / (ascent - descent)
+// so if you prefer to measure height by the ascent only, use a similar calculation.
+
+STBTT_DEF double stbtt_ScaleForMappingEmToPixels(const stbtt_fontinfo *info, double pixels);
+// computes a scale factor to produce a font whose EM size is mapped to
+// 'pixels' tall. This is probably what traditional APIs compute, but
+// I'm not positive.
+
+STBTT_DEF void stbtt_GetFontVMetrics(const stbtt_fontinfo *info, int *ascent, int *descent, int *lineGap);
+// ascent is the coordinate above the baseline the font extends; descent
+// is the coordinate below the baseline the font extends (i.e. it is typically negative)
+// lineGap is the spacing between one row's descent and the next row's ascent...
+// so you should advance the vertical position by "*ascent - *descent + *lineGap"
+//   these are expressed in unscaled coordinates, so you must multiply by
+//   the scale factor for a given size
+
+STBTT_DEF int  stbtt_GetFontVMetricsOS2(const stbtt_fontinfo *info, int *typoAscent, int *typoDescent, int *typoLineGap);
+// analogous to GetFontVMetrics, but returns the "typographic" values from the OS/2
+// table (specific to MS/Windows TTF files).
+//
+// Returns 1 on success (table present), 0 on failure.
+
+STBTT_DEF void stbtt_GetFontBoundingBox(const stbtt_fontinfo *info, int *x0, int *y0, int *x1, int *y1);
+// the bounding box around all possible characters
+
+STBTT_DEF void stbtt_GetCodepointHMetrics(const stbtt_fontinfo *info, int codepoint, int *advanceWidth, int *leftSideBearing);
+// leftSideBearing is the offset from the current horizontal position to the left edge of the character
+// advanceWidth is the offset from the current horizontal position to the next horizontal position
+//   these are expressed in unscaled coordinates
+
+STBTT_DEF int  stbtt_GetCodepointKernAdvance(const stbtt_fontinfo *info, int ch1, int ch2);
+// an additional amount to add to the 'advance' value between ch1 and ch2
+
+STBTT_DEF int stbtt_GetCodepointBox(const stbtt_fontinfo *info, int codepoint, int *x0, int *y0, int *x1, int *y1);
+// Gets the bounding box of the visible part of the glyph, in unscaled coordinates
+
+STBTT_DEF void stbtt_GetGlyphHMetrics(const stbtt_fontinfo *info, int glyph_index, int *advanceWidth, int *leftSideBearing);
+STBTT_DEF int  stbtt_GetGlyphKernAdvance(const stbtt_fontinfo *info, int glyph1, int glyph2);
+STBTT_DEF int  stbtt_GetGlyphBox(const stbtt_fontinfo *info, int glyph_index, int *x0, int *y0, int *x1, int *y1);
+// as above, but takes one or more glyph indices for greater efficiency
+
+typedef struct stbtt_kerningentry
+{
+   int glyph1; // use stbtt_FindGlyphIndex
+   int glyph2;
+   int advance;
+} stbtt_kerningentry;
+
+STBTT_DEF int  stbtt_GetKerningTableLength(const stbtt_fontinfo *info);
+STBTT_DEF int  stbtt_GetKerningTable(const stbtt_fontinfo *info, stbtt_kerningentry* table, int table_length);
+// Retrieves a complete list of all of the kerning pairs provided by the font
+// stbtt_GetKerningTable never writes more than table_length entries and returns how many entries it did write.
+// The table will be sorted by (a.glyph1 == b.glyph1)?(a.glyph2 < b.glyph2):(a.glyph1 < b.glyph1)
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// GLYPH SHAPES (you probably don't need these, but they have to go before
+// the bitmaps for C declaration-order reasons)
+//
+
+#ifndef STBTT_vmove // you can predefine these to use different values (but why?)
+   enum {
+      STBTT_vmove=1,
+      STBTT_vline,
+      STBTT_vcurve,
+      STBTT_vcubic
+   };
+#endif
+
+#ifndef stbtt_vertex // you can predefine this to use different values
+                   // (we share this with other code at RAD)
+   #define stbtt_vertex_type short // can't use stbtt_int16 because that's not visible in the header file
+   typedef struct
+   {
+      stbtt_vertex_type x,y,cx,cy,cx1,cy1;
+      unsigned char type,padding;
+   } stbtt_vertex;
+#endif
+
+STBTT_DEF int stbtt_IsGlyphEmpty(const stbtt_fontinfo *info, int glyph_index);
+// returns non-zero if nothing is drawn for this glyph
+
+STBTT_DEF int stbtt_GetCodepointShape(const stbtt_fontinfo *info, int unicode_codepoint, stbtt_vertex **vertices);
+STBTT_DEF int stbtt_GetGlyphShape(const stbtt_fontinfo *info, int glyph_index, stbtt_vertex **vertices);
+// returns # of vertices and fills *vertices with the pointer to them
+//   these are expressed in "unscaled" coordinates
+//
+// The shape is a series of contours. Each one starts with
+// a STBTT_moveto, then consists of a series of mixed
+// STBTT_lineto and STBTT_curveto segments. A lineto
+// draws a line from previous endpoint to its x,y; a curveto
+// draws a quadratic bezier from previous endpoint to
+// its x,y, using cx,cy as the bezier control point.
+
+STBTT_DEF void stbtt_FreeShape(const stbtt_fontinfo *info, stbtt_vertex *vertices);
+// frees the data allocated above
+
+STBTT_DEF int stbtt_GetCodepointSVG(const stbtt_fontinfo *info, int unicode_codepoint, const char **svg);
+STBTT_DEF int stbtt_GetGlyphSVG(const stbtt_fontinfo *info, int gl, const char **svg);
+// fills svg with the character's SVG data.
+// returns data size or 0 if SVG not found.
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// BITMAP RENDERING
+//
+
+STBTT_DEF void stbtt_FreeBitmap(unsigned char *bitmap, void *userdata);
+// frees the bitmap allocated below
+
+STBTT_DEF unsigned char *stbtt_GetCodepointBitmap(const stbtt_fontinfo *info, double scale_x, double scale_y, int codepoint, int *width, int *height, int *xoff, int *yoff);
+// allocates a large-enough single-channel 8bpp bitmap and renders the
+// specified character/glyph at the specified scale into it, with
+// antialiasing. 0 is no coverage (transparent), 255 is fully covered (opaque).
+// *width & *height are filled out with the width & height of the bitmap,
+// which is stored left-to-right, top-to-bottom.
+//
+// xoff/yoff are the offset it pixel space from the glyph origin to the top-left of the bitmap
+
+STBTT_DEF unsigned char *stbtt_GetCodepointBitmapSubpixel(const stbtt_fontinfo *info, double scale_x, double scale_y, double shift_x, double shift_y, int codepoint, int *width, int *height, int *xoff, int *yoff);
+// the same as stbtt_GetCodepoitnBitmap, but you can specify a subpixel
+// shift for the character
+
+STBTT_DEF void stbtt_MakeCodepointBitmap(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, int codepoint);
+// the same as stbtt_GetCodepointBitmap, but you pass in storage for the bitmap
+// in the form of 'output', with row spacing of 'out_stride' bytes. the bitmap
+// is clipped to out_w/out_h bytes. Call stbtt_GetCodepointBitmapBox to get the
+// width and height and positioning info for it first.
+
+STBTT_DEF void stbtt_MakeCodepointBitmapSubpixel(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, double shift_x, double shift_y, int codepoint);
+// same as stbtt_MakeCodepointBitmap, but you can specify a subpixel
+// shift for the character
+
+STBTT_DEF void stbtt_MakeCodepointBitmapSubpixelPrefilter(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, double shift_x, double shift_y, int oversample_x, int oversample_y, double *sub_x, double *sub_y, int codepoint);
+// same as stbtt_MakeCodepointBitmapSubpixel, but prefiltering
+// is performed (see stbtt_PackSetOversampling)
+
+STBTT_DEF void stbtt_GetCodepointBitmapBox(const stbtt_fontinfo *font, int codepoint, double scale_x, double scale_y, int *ix0, int *iy0, int *ix1, int *iy1);
+// get the bbox of the bitmap centered around the glyph origin; so the
+// bitmap width is ix1-ix0, height is iy1-iy0, and location to place
+// the bitmap top left is (leftSideBearing*scale,iy0).
+// (Note that the bitmap uses y-increases-down, but the shape uses
+// y-increases-up, so CodepointBitmapBox and CodepointBox are inverted.)
+
+STBTT_DEF void stbtt_GetCodepointBitmapBoxSubpixel(const stbtt_fontinfo *font, int codepoint, double scale_x, double scale_y, double shift_x, double shift_y, int *ix0, int *iy0, int *ix1, int *iy1);
+// same as stbtt_GetCodepointBitmapBox, but you can specify a subpixel
+// shift for the character
+
+// the following functions are equivalent to the above functions, but operate
+// on glyph indices instead of Unicode codepoints (for efficiency)
+STBTT_DEF unsigned char *stbtt_GetGlyphBitmap(const stbtt_fontinfo *info, double scale_x, double scale_y, int glyph, int *width, int *height, int *xoff, int *yoff);
+STBTT_DEF unsigned char *stbtt_GetGlyphBitmapSubpixel(const stbtt_fontinfo *info, double scale_x, double scale_y, double shift_x, double shift_y, int glyph, int *width, int *height, int *xoff, int *yoff);
+STBTT_DEF void stbtt_MakeGlyphBitmap(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, int glyph);
+STBTT_DEF void stbtt_MakeGlyphBitmapSubpixel(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, double shift_x, double shift_y, int glyph);
+STBTT_DEF void stbtt_MakeGlyphBitmapSubpixelPrefilter(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, double shift_x, double shift_y, int oversample_x, int oversample_y, double *sub_x, double *sub_y, int glyph);
+STBTT_DEF void stbtt_GetGlyphBitmapBox(const stbtt_fontinfo *font, int glyph, double scale_x, double scale_y, int *ix0, int *iy0, int *ix1, int *iy1);
+STBTT_DEF void stbtt_GetGlyphBitmapBoxSubpixel(const stbtt_fontinfo *font, int glyph, double scale_x, double scale_y,double shift_x, double shift_y, int *ix0, int *iy0, int *ix1, int *iy1);
+
+
+// @TODO: don't expose this structure
+typedef struct
+{
+   int w,h,stride;
+   unsigned char *pixels;
+} stbtt__bitmap;
+
+// rasterize a shape with quadratic beziers into a bitmap
+STBTT_DEF void stbtt_Rasterize(stbtt__bitmap *result,        // 1-channel bitmap to draw into
+                               double flatness_in_pixels,     // allowable error of curve in pixels
+                               stbtt_vertex *vertices,       // array of vertices defining shape
+                               int num_verts,                // number of vertices in above array
+                               double scale_x, double scale_y, // scale applied to input vertices
+                               double shift_x, double shift_y, // translation applied to input vertices
+                               int x_off, int y_off,         // another translation applied to input
+                               int invert,                   // if non-zero, vertically flip shape
+                               void *userdata);              // context for to STBTT_MALLOC
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Signed Distance Function (or Field) rendering
+
+STBTT_DEF void stbtt_FreeSDF(unsigned char *bitmap, void *userdata);
+// frees the SDF bitmap allocated below
+
+STBTT_DEF unsigned char * stbtt_GetGlyphSDF(const stbtt_fontinfo *info, double scale, int glyph, int padding, unsigned char onedge_value, double pixel_dist_scale, int *width, int *height, int *xoff, int *yoff);
+STBTT_DEF unsigned char * stbtt_GetCodepointSDF(const stbtt_fontinfo *info, double scale, int codepoint, int padding, unsigned char onedge_value, double pixel_dist_scale, int *width, int *height, int *xoff, int *yoff);
+// These functions compute a discretized SDF field for a single character, suitable for storing
+// in a single-channel texture, sampling with bilinear filtering, and testing against
+// larger than some threshold to produce scalable fonts.
+//        info              --  the font
+//        scale             --  controls the size of the resulting SDF bitmap, same as it would be creating a regular bitmap
+//        glyph/codepoint   --  the character to generate the SDF for
+//        padding           --  extra "pixels" around the character which are filled with the distance to the character (not 0),
+//                                 which allows effects like bit outlines
+//        onedge_value      --  value 0-255 to test the SDF against to reconstruct the character (i.e. the isocontour of the character)
+//        pixel_dist_scale  --  what value the SDF should increase by when moving one SDF "pixel" away from the edge (on the 0..255 scale)
+//                                 if positive, > onedge_value is inside; if negative, < onedge_value is inside
+//        width,height      --  output height & width of the SDF bitmap (including padding)
+//        xoff,yoff         --  output origin of the character
+//        return value      --  a 2D array of bytes 0..255, width*height in size
+//
+// pixel_dist_scale & onedge_value are a scale & bias that allows you to make
+// optimal use of the limited 0..255 for your application, trading off precision
+// and special effects. SDF values outside the range 0..255 are clamped to 0..255.
+//
+// Example:
+//      scale = stbtt_ScaleForPixelHeight(22)
+//      padding = 5
+//      onedge_value = 180
+//      pixel_dist_scale = 180/5.0 = 36.0
+//
+//      This will create an SDF bitmap in which the character is about 22 pixels
+//      high but the whole bitmap is about 22+5+5=32 pixels high. To produce a filled
+//      shape, sample the SDF at each pixel and fill the pixel if the SDF value
+//      is greater than or equal to 180/255. (You'll actually want to antialias,
+//      which is beyond the scope of this example.) Additionally, you can compute
+//      offset outlines (e.g. to stroke the character border inside & outside,
+//      or only outside). For example, to fill outside the character up to 3 SDF
+//      pixels, you would compare against (180-36.0*3)/255 = 72/255. The above
+//      choice of variables maps a range from 5 pixels outside the shape to
+//      2 pixels inside the shape to 0..255; this is intended primarily for apply
+//      outside effects only (the interior range is needed to allow proper
+//      antialiasing of the font at *smaller* sizes)
+//
+// The function computes the SDF analytically at each SDF pixel, not by e.g.
+// building a higher-res bitmap and approximating it. In theory the quality
+// should be as high as possible for an SDF of this size & representation, but
+// unclear if this is true in practice (perhaps building a higher-res bitmap
+// and computing from that can allow drop-out prevention).
+//
+// The algorithm has not been optimized at all, so expect it to be slow
+// if computing lots of characters or very large sizes.
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Finding the right font...
+//
+// You should really just solve this offline, keep your own tables
+// of what font is what, and don't try to get it out of the .ttf file.
+// That's because getting it out of the .ttf file is really hard, because
+// the names in the file can appear in many possible encodings, in many
+// possible languages, and e.g. if you need a case-insensitive comparison,
+// the details of that depend on the encoding & language in a complex way
+// (actually underspecified in truetype, but also gigantic).
+//
+// But you can use the provided functions in two possible ways:
+//     stbtt_FindMatchingFont() will use *case-sensitive* comparisons on
+//             unicode-encoded names to try to find the font you want;
+//             you can run this before calling stbtt_InitFont()
+//
+//     stbtt_GetFontNameString() lets you get any of the various strings
+//             from the file yourself and do your own comparisons on them.
+//             You have to have called stbtt_InitFont() first.
+
+
+STBTT_DEF int stbtt_FindMatchingFont(const unsigned char *fontdata, const char *name, int flags);
+// returns the offset (not index) of the font that matches, or -1 if none
+//   if you use STBTT_MACSTYLE_DONTCARE, use a font name like "Arial Bold".
+//   if you use any other flag, use a font name like "Arial"; this checks
+//     the 'macStyle' header field; i don't know if fonts set this consistently
+#define STBTT_MACSTYLE_DONTCARE     0
+#define STBTT_MACSTYLE_BOLD         1
+#define STBTT_MACSTYLE_ITALIC       2
+#define STBTT_MACSTYLE_UNDERSCORE   4
+#define STBTT_MACSTYLE_NONE         8   // <= not same as 0, this makes us check the bitfield is 0
+
+STBTT_DEF int stbtt_CompareUTF8toUTF16_bigendian(const char *s1, int len1, const char *s2, int len2);
+// returns 1/0 whether the first string interpreted as utf8 is identical to
+// the second string interpreted as big-endian utf16... useful for strings from next func
+
+STBTT_DEF const char *stbtt_GetFontNameString(const stbtt_fontinfo *font, int *length, int platformID, int encodingID, int languageID, int nameID);
+// returns the string (which may be big-endian double byte, e.g. for unicode)
+// and puts the length in bytes in *length.
+//
+// some of the values for the IDs are below; for more see the truetype spec:
+//     http://developer.apple.com/textfonts/TTRefMan/RM06/Chap6name.html
+//     http://www.microsoft.com/typography/otspec/name.htm
+
+enum { // platformID
+   STBTT_PLATFORM_ID_UNICODE   =0,
+   STBTT_PLATFORM_ID_MAC       =1,
+   STBTT_PLATFORM_ID_ISO       =2,
+   STBTT_PLATFORM_ID_MICROSOFT =3
+};
+
+enum { // encodingID for STBTT_PLATFORM_ID_UNICODE
+   STBTT_UNICODE_EID_UNICODE_1_0    =0,
+   STBTT_UNICODE_EID_UNICODE_1_1    =1,
+   STBTT_UNICODE_EID_ISO_10646      =2,
+   STBTT_UNICODE_EID_UNICODE_2_0_BMP=3,
+   STBTT_UNICODE_EID_UNICODE_2_0_FULL=4
+};
+
+enum { // encodingID for STBTT_PLATFORM_ID_MICROSOFT
+   STBTT_MS_EID_SYMBOL        =0,
+   STBTT_MS_EID_UNICODE_BMP   =1,
+   STBTT_MS_EID_SHIFTJIS      =2,
+   STBTT_MS_EID_UNICODE_FULL  =10
+};
+
+enum { // encodingID for STBTT_PLATFORM_ID_MAC; same as Script Manager codes
+   STBTT_MAC_EID_ROMAN        =0,   STBTT_MAC_EID_ARABIC       =4,
+   STBTT_MAC_EID_JAPANESE     =1,   STBTT_MAC_EID_HEBREW       =5,
+   STBTT_MAC_EID_CHINESE_TRAD =2,   STBTT_MAC_EID_GREEK        =6,
+   STBTT_MAC_EID_KOREAN       =3,   STBTT_MAC_EID_RUSSIAN      =7
+};
+
+enum { // languageID for STBTT_PLATFORM_ID_MICROSOFT; same as LCID...
+       // problematic because there are e.g. 16 english LCIDs and 16 arabic LCIDs
+   STBTT_MS_LANG_ENGLISH     =0x0409,   STBTT_MS_LANG_ITALIAN     =0x0410,
+   STBTT_MS_LANG_CHINESE     =0x0804,   STBTT_MS_LANG_JAPANESE    =0x0411,
+   STBTT_MS_LANG_DUTCH       =0x0413,   STBTT_MS_LANG_KOREAN      =0x0412,
+   STBTT_MS_LANG_FRENCH      =0x040c,   STBTT_MS_LANG_RUSSIAN     =0x0419,
+   STBTT_MS_LANG_GERMAN      =0x0407,   STBTT_MS_LANG_SPANISH     =0x0409,
+   STBTT_MS_LANG_HEBREW      =0x040d,   STBTT_MS_LANG_SWEDISH     =0x041D
+};
+
+enum { // languageID for STBTT_PLATFORM_ID_MAC
+   STBTT_MAC_LANG_ENGLISH      =0 ,   STBTT_MAC_LANG_JAPANESE     =11,
+   STBTT_MAC_LANG_ARABIC       =12,   STBTT_MAC_LANG_KOREAN       =23,
+   STBTT_MAC_LANG_DUTCH        =4 ,   STBTT_MAC_LANG_RUSSIAN      =32,
+   STBTT_MAC_LANG_FRENCH       =1 ,   STBTT_MAC_LANG_SPANISH      =6 ,
+   STBTT_MAC_LANG_GERMAN       =2 ,   STBTT_MAC_LANG_SWEDISH      =5 ,
+   STBTT_MAC_LANG_HEBREW       =10,   STBTT_MAC_LANG_CHINESE_SIMPLIFIED =33,
+   STBTT_MAC_LANG_ITALIAN      =3 ,   STBTT_MAC_LANG_CHINESE_TRAD =19
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __STB_INCLUDE_STB_TRUETYPE_H__
+
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+////
+////   IMPLEMENTATION
+////
+////
+
+#ifdef STB_TRUETYPE_IMPLEMENTATION
+
+#ifndef STBTT_MAX_OVERSAMPLE
+#define STBTT_MAX_OVERSAMPLE   8
+#endif
+
+#if STBTT_MAX_OVERSAMPLE > 255
+#error "STBTT_MAX_OVERSAMPLE cannot be > 255"
+#endif
+
+typedef int stbtt__test_oversample_pow2[(STBTT_MAX_OVERSAMPLE & (STBTT_MAX_OVERSAMPLE-1)) == 0 ? 1 : -1];
+
+#ifndef STBTT_RASTERIZER_VERSION
+#define STBTT_RASTERIZER_VERSION 2
+#endif
+
+#ifdef _MSC_VER
+#define STBTT__NOTUSED(v)  (void)(v)
+#else
+#define STBTT__NOTUSED(v)  (void)sizeof(v)
+#endif
+
+//////////////////////////////////////////////////////////////////////////
+//
+// stbtt__buf helpers to parse data from file
+//
+
+static stbtt_uint8 stbtt__buf_get8(stbtt__buf *b)
+{
+   if (b->cursor >= b->size)
+      return 0;
+   return b->data[b->cursor++];
+}
+
+static stbtt_uint8 stbtt__buf_peek8(stbtt__buf *b)
+{
+   if (b->cursor >= b->size)
+      return 0;
+   return b->data[b->cursor];
+}
+
+static void stbtt__buf_seek(stbtt__buf *b, int o)
+{
+   STBTT_assert(!(o > b->size || o < 0));
+   b->cursor = (o > b->size || o < 0) ? b->size : o;
+}
+
+static void stbtt__buf_skip(stbtt__buf *b, int o)
+{
+   stbtt__buf_seek(b, b->cursor + o);
+}
+
+static stbtt_uint32 stbtt__buf_get(stbtt__buf *b, int n)
+{
+   stbtt_uint32 v = 0;
+   int i;
+   STBTT_assert(n >= 1 && n <= 4);
+   for (i = 0; i < n; i++)
+      v = (v << 8) | stbtt__buf_get8(b);
+   return v;
+}
+
+static stbtt__buf stbtt__new_buf(const void *p, size_t size)
+{
+   stbtt__buf r;
+   STBTT_assert(size < 0x40000000);
+   r.data = (stbtt_uint8*) p;
+   r.size = (int) size;
+   r.cursor = 0;
+   return r;
+}
+
+#define stbtt__buf_get16(b)  stbtt__buf_get((b), 2)
+#define stbtt__buf_get32(b)  stbtt__buf_get((b), 4)
+
+static stbtt__buf stbtt__buf_range(const stbtt__buf *b, int o, int s)
+{
+   stbtt__buf r = stbtt__new_buf(NULL, 0);
+   if (o < 0 || s < 0 || o > b->size || s > b->size - o) return r;
+   r.data = b->data + o;
+   r.size = s;
+   return r;
+}
+
+static stbtt__buf stbtt__cff_get_index(stbtt__buf *b)
+{
+   int count, start, offsize;
+   start = b->cursor;
+   count = stbtt__buf_get16(b);
+   if (count) {
+      offsize = stbtt__buf_get8(b);
+      STBTT_assert(offsize >= 1 && offsize <= 4);
+      stbtt__buf_skip(b, offsize * count);
+      stbtt__buf_skip(b, stbtt__buf_get(b, offsize) - 1);
+   }
+   return stbtt__buf_range(b, start, b->cursor - start);
+}
+
+static stbtt_uint32 stbtt__cff_int(stbtt__buf *b)
+{
+   int b0 = stbtt__buf_get8(b);
+   if (b0 >= 32 && b0 <= 246)       return b0 - 139;
+   else if (b0 >= 247 && b0 <= 250) return (b0 - 247)*256 + stbtt__buf_get8(b) + 108;
+   else if (b0 >= 251 && b0 <= 254) return -(b0 - 251)*256 - stbtt__buf_get8(b) - 108;
+   else if (b0 == 28)               return stbtt__buf_get16(b);
+   else if (b0 == 29)               return stbtt__buf_get32(b);
+   STBTT_assert(0);
+   return 0;
+}
+
+static void stbtt__cff_skip_operand(stbtt__buf *b) {
+   int v, b0 = stbtt__buf_peek8(b);
+   STBTT_assert(b0 >= 28);
+   if (b0 == 30) {
+      stbtt__buf_skip(b, 1);
+      while (b->cursor < b->size) {
+         v = stbtt__buf_get8(b);
+         if ((v & 0xF) == 0xF || (v >> 4) == 0xF)
+            break;
+      }
+   } else {
+      stbtt__cff_int(b);
+   }
+}
+
+static stbtt__buf stbtt__dict_get(stbtt__buf *b, int key)
+{
+   stbtt__buf_seek(b, 0);
+   while (b->cursor < b->size) {
+      int start = b->cursor, end, op;
+      while (stbtt__buf_peek8(b) >= 28)
+         stbtt__cff_skip_operand(b);
+      end = b->cursor;
+      op = stbtt__buf_get8(b);
+      if (op == 12)  op = stbtt__buf_get8(b) | 0x100;
+      if (op == key) return stbtt__buf_range(b, start, end-start);
+   }
+   return stbtt__buf_range(b, 0, 0);
+}
+
+static void stbtt__dict_get_ints(stbtt__buf *b, int key, int outcount, stbtt_uint32 *out)
+{
+   int i;
+   stbtt__buf operands = stbtt__dict_get(b, key);
+   for (i = 0; i < outcount && operands.cursor < operands.size; i++)
+      out[i] = stbtt__cff_int(&operands);
+}
+
+static int stbtt__cff_index_count(stbtt__buf *b)
+{
+   stbtt__buf_seek(b, 0);
+   return stbtt__buf_get16(b);
+}
+
+static stbtt__buf stbtt__cff_index_get(stbtt__buf b, int i)
+{
+   int count, offsize, start, end;
+   stbtt__buf_seek(&b, 0);
+   count = stbtt__buf_get16(&b);
+   offsize = stbtt__buf_get8(&b);
+   STBTT_assert(i >= 0 && i < count);
+   STBTT_assert(offsize >= 1 && offsize <= 4);
+   stbtt__buf_skip(&b, i*offsize);
+   start = stbtt__buf_get(&b, offsize);
+   end = stbtt__buf_get(&b, offsize);
+   return stbtt__buf_range(&b, 2+(count+1)*offsize+start, end - start);
+}
+
+//////////////////////////////////////////////////////////////////////////
+//
+// accessors to parse data from file
+//
+
+// on platforms that don't allow misaligned reads, if we want to allow
+// truetype fonts that aren't padded to alignment, define ALLOW_UNALIGNED_TRUETYPE
+
+#define ttBYTE(p)     (* (stbtt_uint8 *) (p))
+#define ttCHAR(p)     (* (stbtt_int8 *) (p))
+#define ttFixed(p)    ttLONG(p)
+
+static stbtt_uint16 ttUSHORT(stbtt_uint8 *p) { return p[0]*256 + p[1]; }
+static stbtt_int16 ttSHORT(stbtt_uint8 *p)   { return p[0]*256 + p[1]; }
+static stbtt_uint32 ttULONG(stbtt_uint8 *p)  { return (p[0]<<24) + (p[1]<<16) + (p[2]<<8) + p[3]; }
+static stbtt_int32 ttLONG(stbtt_uint8 *p)    { return (p[0]<<24) + (p[1]<<16) + (p[2]<<8) + p[3]; }
+
+#define stbtt_tag4(p,c0,c1,c2,c3) ((p)[0] == (c0) && (p)[1] == (c1) && (p)[2] == (c2) && (p)[3] == (c3))
+#define stbtt_tag(p,str)           stbtt_tag4(p,str[0],str[1],str[2],str[3])
+
+static int stbtt__isfont(stbtt_uint8 *font)
+{
+   // check the version number
+   if (stbtt_tag4(font, '1',0,0,0))  return 1; // TrueType 1
+   if (stbtt_tag(font, "typ1"))   return 1; // TrueType with type 1 font -- we don't support this!
+   if (stbtt_tag(font, "OTTO"))   return 1; // OpenType with CFF
+   if (stbtt_tag4(font, 0,1,0,0)) return 1; // OpenType 1.0
+   if (stbtt_tag(font, "true"))   return 1; // Apple specification for TrueType fonts
+   return 0;
+}
+
+// @OPTIMIZE: binary search
+static stbtt_uint32 stbtt__find_table(stbtt_uint8 *data, stbtt_uint32 fontstart, const char *tag)
+{
+   stbtt_int32 num_tables = ttUSHORT(data+fontstart+4);
+   stbtt_uint32 tabledir = fontstart + 12;
+   stbtt_int32 i;
+   for (i=0; i < num_tables; ++i) {
+      stbtt_uint32 loc = tabledir + 16*i;
+      if (stbtt_tag(data+loc+0, tag))
+         return ttULONG(data+loc+8);
+   }
+   return 0;
+}
+
+static int stbtt_GetFontOffsetForIndex_internal(unsigned char *font_collection, int index)
+{
+   // if it's just a font, there's only one valid index
+   if (stbtt__isfont(font_collection))
+      return index == 0 ? 0 : -1;
+
+   // check if it's a TTC
+   if (stbtt_tag(font_collection, "ttcf")) {
+      // version 1?
+      if (ttULONG(font_collection+4) == 0x00010000 || ttULONG(font_collection+4) == 0x00020000) {
+         stbtt_int32 n = ttLONG(font_collection+8);
+         if (index >= n)
+            return -1;
+         return ttULONG(font_collection+12+index*4);
+      }
+   }
+   return -1;
+}
+
+static int stbtt_GetNumberOfFonts_internal(unsigned char *font_collection)
+{
+   // if it's just a font, there's only one valid font
+   if (stbtt__isfont(font_collection))
+      return 1;
+
+   // check if it's a TTC
+   if (stbtt_tag(font_collection, "ttcf")) {
+      // version 1?
+      if (ttULONG(font_collection+4) == 0x00010000 || ttULONG(font_collection+4) == 0x00020000) {
+         return ttLONG(font_collection+8);
+      }
+   }
+   return 0;
+}
+
+static stbtt__buf stbtt__get_subrs(stbtt__buf cff, stbtt__buf fontdict)
+{
+   stbtt_uint32 subrsoff = 0, private_loc[2] = { 0, 0 };
+   stbtt__buf pdict;
+   stbtt__dict_get_ints(&fontdict, 18, 2, private_loc);
+   if (!private_loc[1] || !private_loc[0]) return stbtt__new_buf(NULL, 0);
+   pdict = stbtt__buf_range(&cff, private_loc[1], private_loc[0]);
+   stbtt__dict_get_ints(&pdict, 19, 1, &subrsoff);
+   if (!subrsoff) return stbtt__new_buf(NULL, 0);
+   stbtt__buf_seek(&cff, private_loc[1]+subrsoff);
+   return stbtt__cff_get_index(&cff);
+}
+
+// since most people won't use this, find this table the first time it's needed
+static int stbtt__get_svg(stbtt_fontinfo *info)
+{
+   stbtt_uint32 t;
+   if (info->svg < 0) {
+      t = stbtt__find_table(info->data, info->fontstart, "SVG ");
+      if (t) {
+         stbtt_uint32 offset = ttULONG(info->data + t + 2);
+         info->svg = t + offset;
+      } else {
+         info->svg = 0;
+      }
+   }
+   return info->svg;
+}
+
+static int stbtt_InitFont_internal(stbtt_fontinfo *info, unsigned char *data, int fontstart)
+{
+   stbtt_uint32 cmap, t;
+   stbtt_int32 i,numTables;
+
+   info->data = data;
+   info->fontstart = fontstart;
+   info->cff = stbtt__new_buf(NULL, 0);
+
+   cmap = stbtt__find_table(data, fontstart, "cmap");       // required
+   info->loca = stbtt__find_table(data, fontstart, "loca"); // required
+   info->head = stbtt__find_table(data, fontstart, "head"); // required
+   info->glyf = stbtt__find_table(data, fontstart, "glyf"); // required
+   info->hhea = stbtt__find_table(data, fontstart, "hhea"); // required
+   info->hmtx = stbtt__find_table(data, fontstart, "hmtx"); // required
+   info->kern = stbtt__find_table(data, fontstart, "kern"); // not required
+   info->gpos = stbtt__find_table(data, fontstart, "GPOS"); // not required
+
+   if (!cmap || !info->head || !info->hhea || !info->hmtx)
+      return 0;
+   if (info->glyf) {
+      // required for truetype
+      if (!info->loca) return 0;
+   } else {
+      // initialization for CFF / Type2 fonts (OTF)
+      stbtt__buf b, topdict, topdictidx;
+      stbtt_uint32 cstype = 2, charstrings = 0, fdarrayoff = 0, fdselectoff = 0;
+      stbtt_uint32 cff;
+
+      cff = stbtt__find_table(data, fontstart, "CFF ");
+      if (!cff) return 0;
+
+      info->fontdicts = stbtt__new_buf(NULL, 0);
+      info->fdselect = stbtt__new_buf(NULL, 0);
+
+      // @TODO this should use size from table (not 512MB)
+      info->cff = stbtt__new_buf(data+cff, 512*1024*1024);
+      b = info->cff;
+
+      // read the header
+      stbtt__buf_skip(&b, 2);
+      stbtt__buf_seek(&b, stbtt__buf_get8(&b)); // hdrsize
+
+      // @TODO the name INDEX could list multiple fonts,
+      // but we just use the first one.
+      stbtt__cff_get_index(&b);  // name INDEX
+      topdictidx = stbtt__cff_get_index(&b);
+      topdict = stbtt__cff_index_get(topdictidx, 0);
+      stbtt__cff_get_index(&b);  // string INDEX
+      info->gsubrs = stbtt__cff_get_index(&b);
+
+      stbtt__dict_get_ints(&topdict, 17, 1, &charstrings);
+      stbtt__dict_get_ints(&topdict, 0x100 | 6, 1, &cstype);
+      stbtt__dict_get_ints(&topdict, 0x100 | 36, 1, &fdarrayoff);
+      stbtt__dict_get_ints(&topdict, 0x100 | 37, 1, &fdselectoff);
+      info->subrs = stbtt__get_subrs(b, topdict);
+
+      // we only support Type 2 charstrings
+      if (cstype != 2) return 0;
+      if (charstrings == 0) return 0;
+
+      if (fdarrayoff) {
+         // looks like a CID font
+         if (!fdselectoff) return 0;
+         stbtt__buf_seek(&b, fdarrayoff);
+         info->fontdicts = stbtt__cff_get_index(&b);
+         info->fdselect = stbtt__buf_range(&b, fdselectoff, b.size-fdselectoff);
+      }
+
+      stbtt__buf_seek(&b, charstrings);
+      info->charstrings = stbtt__cff_get_index(&b);
+   }
+
+   t = stbtt__find_table(data, fontstart, "maxp");
+   if (t)
+      info->numGlyphs = ttUSHORT(data+t+4);
+   else
+      info->numGlyphs = 0xffff;
+
+   info->svg = -1;
+
+   // find a cmap encoding table we understand *now* to avoid searching
+   // later. (todo: could make this installable)
+   // the same regardless of glyph.
+   numTables = ttUSHORT(data + cmap + 2);
+   info->index_map = 0;
+   for (i=0; i < numTables; ++i) {
+      stbtt_uint32 encoding_record = cmap + 4 + 8 * i;
+      // find an encoding we understand:
+      switch(ttUSHORT(data+encoding_record)) {
+         case STBTT_PLATFORM_ID_MICROSOFT:
+            switch (ttUSHORT(data+encoding_record+2)) {
+               case STBTT_MS_EID_UNICODE_BMP:
+               case STBTT_MS_EID_UNICODE_FULL:
+                  // MS/Unicode
+                  info->index_map = cmap + ttULONG(data+encoding_record+4);
+                  break;
+            }
+            break;
+        case STBTT_PLATFORM_ID_UNICODE:
+            // Mac/iOS has these
+            // all the encodingIDs are unicode, so we don't bother to check it
+            info->index_map = cmap + ttULONG(data+encoding_record+4);
+            break;
+      }
+   }
+   if (info->index_map == 0)
+      return 0;
+
+   info->indexToLocFormat = ttUSHORT(data+info->head + 50);
+   return 1;
+}
+
+STBTT_DEF int stbtt_FindGlyphIndex(const stbtt_fontinfo *info, int unicode_codepoint)
+{
+   stbtt_uint8 *data = info->data;
+   stbtt_uint32 index_map = info->index_map;
+
+   stbtt_uint16 format = ttUSHORT(data + index_map + 0);
+   if (format == 0) { // apple byte encoding
+      stbtt_int32 bytes = ttUSHORT(data + index_map + 2);
+      if (unicode_codepoint < bytes-6)
+         return ttBYTE(data + index_map + 6 + unicode_codepoint);
+      return 0;
+   } else if (format == 6) {
+      stbtt_uint32 first = ttUSHORT(data + index_map + 6);
+      stbtt_uint32 count = ttUSHORT(data + index_map + 8);
+      if ((stbtt_uint32) unicode_codepoint >= first && (stbtt_uint32) unicode_codepoint < first+count)
+         return ttUSHORT(data + index_map + 10 + (unicode_codepoint - first)*2);
+      return 0;
+   } else if (format == 2) {
+      STBTT_assert(0); // @TODO: high-byte mapping for japanese/chinese/korean
+      return 0;
+   } else if (format == 4) { // standard mapping for windows fonts: binary search collection of ranges
+      stbtt_uint16 segcount = ttUSHORT(data+index_map+6) >> 1;
+      stbtt_uint16 searchRange = ttUSHORT(data+index_map+8) >> 1;
+      stbtt_uint16 entrySelector = ttUSHORT(data+index_map+10);
+      stbtt_uint16 rangeShift = ttUSHORT(data+index_map+12) >> 1;
+
+      // do a binary search of the segments
+      stbtt_uint32 endCount = index_map + 14;
+      stbtt_uint32 search = endCount;
+
+      if (unicode_codepoint > 0xffff)
+         return 0;
+
+      // they lie from endCount .. endCount + segCount
+      // but searchRange is the nearest power of two, so...
+      if (unicode_codepoint >= ttUSHORT(data + search + rangeShift*2))
+         search += rangeShift*2;
+
+      // now decrement to bias correctly to find smallest
+      search -= 2;
+      while (entrySelector) {
+         stbtt_uint16 end;
+         searchRange >>= 1;
+         end = ttUSHORT(data + search + searchRange*2);
+         if (unicode_codepoint > end)
+            search += searchRange*2;
+         --entrySelector;
+      }
+      search += 2;
+
+      {
+         stbtt_uint16 offset, start;
+         stbtt_uint16 item = (stbtt_uint16) ((search - endCount) >> 1);
+
+         STBTT_assert(unicode_codepoint <= ttUSHORT(data + endCount + 2*item));
+         start = ttUSHORT(data + index_map + 14 + segcount*2 + 2 + 2*item);
+         if (unicode_codepoint < start)
+            return 0;
+
+         offset = ttUSHORT(data + index_map + 14 + segcount*6 + 2 + 2*item);
+         if (offset == 0)
+            return (stbtt_uint16) (unicode_codepoint + ttSHORT(data + index_map + 14 + segcount*4 + 2 + 2*item));
+
+         return ttUSHORT(data + offset + (unicode_codepoint-start)*2 + index_map + 14 + segcount*6 + 2 + 2*item);
+      }
+   } else if (format == 12 || format == 13) {
+      stbtt_uint32 ngroups = ttULONG(data+index_map+12);
+      stbtt_int32 low,high;
+      low = 0; high = (stbtt_int32)ngroups;
+      // Binary search the right group.
+      while (low < high) {
+         stbtt_int32 mid = low + ((high-low) >> 1); // rounds down, so low <= mid < high
+         stbtt_uint32 start_char = ttULONG(data+index_map+16+mid*12);
+         stbtt_uint32 end_char = ttULONG(data+index_map+16+mid*12+4);
+         if ((stbtt_uint32) unicode_codepoint < start_char)
+            high = mid;
+         else if ((stbtt_uint32) unicode_codepoint > end_char)
+            low = mid+1;
+         else {
+            stbtt_uint32 start_glyph = ttULONG(data+index_map+16+mid*12+8);
+            if (format == 12)
+               return start_glyph + unicode_codepoint-start_char;
+            else // format == 13
+               return start_glyph;
+         }
+      }
+      return 0; // not found
+   }
+   // @TODO
+   STBTT_assert(0);
+   return 0;
+}
+
+STBTT_DEF int stbtt_GetCodepointShape(const stbtt_fontinfo *info, int unicode_codepoint, stbtt_vertex **vertices)
+{
+   return stbtt_GetGlyphShape(info, stbtt_FindGlyphIndex(info, unicode_codepoint), vertices);
+}
+
+static void stbtt_setvertex(stbtt_vertex *v, stbtt_uint8 type, stbtt_int32 x, stbtt_int32 y, stbtt_int32 cx, stbtt_int32 cy)
+{
+   v->type = type;
+   v->x = (stbtt_int16) x;
+   v->y = (stbtt_int16) y;
+   v->cx = (stbtt_int16) cx;
+   v->cy = (stbtt_int16) cy;
+}
+
+static int stbtt__GetGlyfOffset(const stbtt_fontinfo *info, int glyph_index)
+{
+   int g1,g2;
+
+   STBTT_assert(!info->cff.size);
+
+   if (glyph_index >= info->numGlyphs) return -1; // glyph index out of range
+   if (info->indexToLocFormat >= 2)    return -1; // unknown index->glyph map format
+
+   if (info->indexToLocFormat == 0) {
+      g1 = info->glyf + ttUSHORT(info->data + info->loca + glyph_index * 2) * 2;
+      g2 = info->glyf + ttUSHORT(info->data + info->loca + glyph_index * 2 + 2) * 2;
+   } else {
+      g1 = info->glyf + ttULONG (info->data + info->loca + glyph_index * 4);
+      g2 = info->glyf + ttULONG (info->data + info->loca + glyph_index * 4 + 4);
+   }
+
+   return g1==g2 ? -1 : g1; // if length is 0, return -1
+}
+
+static int stbtt__GetGlyphInfoT2(const stbtt_fontinfo *info, int glyph_index, int *x0, int *y0, int *x1, int *y1);
+
+STBTT_DEF int stbtt_GetGlyphBox(const stbtt_fontinfo *info, int glyph_index, int *x0, int *y0, int *x1, int *y1)
+{
+   if (info->cff.size) {
+      stbtt__GetGlyphInfoT2(info, glyph_index, x0, y0, x1, y1);
+   } else {
+      int g = stbtt__GetGlyfOffset(info, glyph_index);
+      if (g < 0) return 0;
+
+      if (x0) *x0 = ttSHORT(info->data + g + 2);
+      if (y0) *y0 = ttSHORT(info->data + g + 4);
+      if (x1) *x1 = ttSHORT(info->data + g + 6);
+      if (y1) *y1 = ttSHORT(info->data + g + 8);
+   }
+   return 1;
+}
+
+STBTT_DEF int stbtt_GetCodepointBox(const stbtt_fontinfo *info, int codepoint, int *x0, int *y0, int *x1, int *y1)
+{
+   return stbtt_GetGlyphBox(info, stbtt_FindGlyphIndex(info,codepoint), x0,y0,x1,y1);
+}
+
+STBTT_DEF int stbtt_IsGlyphEmpty(const stbtt_fontinfo *info, int glyph_index)
+{
+   stbtt_int16 numberOfContours;
+   int g;
+   if (info->cff.size)
+      return stbtt__GetGlyphInfoT2(info, glyph_index, NULL, NULL, NULL, NULL) == 0;
+   g = stbtt__GetGlyfOffset(info, glyph_index);
+   if (g < 0) return 1;
+   numberOfContours = ttSHORT(info->data + g);
+   return numberOfContours == 0;
+}
+
+static int stbtt__close_shape(stbtt_vertex *vertices, int num_vertices, int was_off, int start_off,
+    stbtt_int32 sx, stbtt_int32 sy, stbtt_int32 scx, stbtt_int32 scy, stbtt_int32 cx, stbtt_int32 cy)
+{
+   if (start_off) {
+      if (was_off)
+         stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve, (cx+scx)>>1, (cy+scy)>>1, cx,cy);
+      stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve, sx,sy,scx,scy);
+   } else {
+      if (was_off)
+         stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve,sx,sy,cx,cy);
+      else
+         stbtt_setvertex(&vertices[num_vertices++], STBTT_vline,sx,sy,0,0);
+   }
+   return num_vertices;
+}
+
+static int stbtt__GetGlyphShapeTT(const stbtt_fontinfo *info, int glyph_index, stbtt_vertex **pvertices)
+{
+   stbtt_int16 numberOfContours;
+   stbtt_uint8 *endPtsOfContours;
+   stbtt_uint8 *data = info->data;
+   stbtt_vertex *vertices=0;
+   int num_vertices=0;
+   int g = stbtt__GetGlyfOffset(info, glyph_index);
+
+   *pvertices = NULL;
+
+   if (g < 0) return 0;
+
+   numberOfContours = ttSHORT(data + g);
+
+   if (numberOfContours > 0) {
+      stbtt_uint8 flags=0,flagcount;
+      stbtt_int32 ins, i,j=0,m,n, next_move, was_off=0, off, start_off=0;
+      stbtt_int32 x,y,cx,cy,sx,sy, scx,scy;
+      stbtt_uint8 *points;
+      endPtsOfContours = (data + g + 10);
+      ins = ttUSHORT(data + g + 10 + numberOfContours * 2);
+      points = data + g + 10 + numberOfContours * 2 + 2 + ins;
+
+      n = 1+ttUSHORT(endPtsOfContours + numberOfContours*2-2);
+
+      m = n + 2*numberOfContours;  // a loose bound on how many vertices we might need
+      vertices = (stbtt_vertex *) STBTT_malloc(m * sizeof(vertices[0]), info->userdata);
+      if (vertices == 0)
+         return 0;
+
+      next_move = 0;
+      flagcount=0;
+
+      // in first pass, we load uninterpreted data into the allocated array
+      // above, shifted to the end of the array so we won't overwrite it when
+      // we create our final data starting from the front
+
+      off = m - n; // starting offset for uninterpreted data, regardless of how m ends up being calculated
+
+      // first load flags
+
+      for (i=0; i < n; ++i) {
+         if (flagcount == 0) {
+            flags = *points++;
+            if (flags & 8)
+               flagcount = *points++;
+         } else
+            --flagcount;
+         vertices[off+i].type = flags;
+      }
+
+      // now load x coordinates
+      x=0;
+      for (i=0; i < n; ++i) {
+         flags = vertices[off+i].type;
+         if (flags & 2) {
+            stbtt_int16 dx = *points++;
+            x += (flags & 16) ? dx : -dx; // ???
+         } else {
+            if (!(flags & 16)) {
+               x = x + (stbtt_int16) (points[0]*256 + points[1]);
+               points += 2;
+            }
+         }
+         vertices[off+i].x = (stbtt_int16) x;
+      }
+
+      // now load y coordinates
+      y=0;
+      for (i=0; i < n; ++i) {
+         flags = vertices[off+i].type;
+         if (flags & 4) {
+            stbtt_int16 dy = *points++;
+            y += (flags & 32) ? dy : -dy; // ???
+         } else {
+            if (!(flags & 32)) {
+               y = y + (stbtt_int16) (points[0]*256 + points[1]);
+               points += 2;
+            }
+         }
+         vertices[off+i].y = (stbtt_int16) y;
+      }
+
+      // now convert them to our format
+      num_vertices=0;
+      sx = sy = cx = cy = scx = scy = 0;
+      for (i=0; i < n; ++i) {
+         flags = vertices[off+i].type;
+         x     = (stbtt_int16) vertices[off+i].x;
+         y     = (stbtt_int16) vertices[off+i].y;
+
+         if (next_move == i) {
+            if (i != 0)
+               num_vertices = stbtt__close_shape(vertices, num_vertices, was_off, start_off, sx,sy,scx,scy,cx,cy);
+
+            // now start the new one
+            start_off = !(flags & 1);
+            if (start_off) {
+               // if we start off with an off-curve point, then when we need to find a point on the curve
+               // where we can start, and we need to save some state for when we wraparound.
+               scx = x;
+               scy = y;
+               if (!(vertices[off+i+1].type & 1)) {
+                  // next point is also a curve point, so interpolate an on-point curve
+                  sx = (x + (stbtt_int32) vertices[off+i+1].x) >> 1;
+                  sy = (y + (stbtt_int32) vertices[off+i+1].y) >> 1;
+               } else {
+                  // otherwise just use the next point as our start point
+                  sx = (stbtt_int32) vertices[off+i+1].x;
+                  sy = (stbtt_int32) vertices[off+i+1].y;
+                  ++i; // we're using point i+1 as the starting point, so skip it
+               }
+            } else {
+               sx = x;
+               sy = y;
+            }
+            stbtt_setvertex(&vertices[num_vertices++], STBTT_vmove,sx,sy,0,0);
+            was_off = 0;
+            next_move = 1 + ttUSHORT(endPtsOfContours+j*2);
+            ++j;
+         } else {
+            if (!(flags & 1)) { // if it's a curve
+               if (was_off) // two off-curve control points in a row means interpolate an on-curve midpoint
+                  stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve, (cx+x)>>1, (cy+y)>>1, cx, cy);
+               cx = x;
+               cy = y;
+               was_off = 1;
+            } else {
+               if (was_off)
+                  stbtt_setvertex(&vertices[num_vertices++], STBTT_vcurve, x,y, cx, cy);
+               else
+                  stbtt_setvertex(&vertices[num_vertices++], STBTT_vline, x,y,0,0);
+               was_off = 0;
+            }
+         }
+      }
+      num_vertices = stbtt__close_shape(vertices, num_vertices, was_off, start_off, sx,sy,scx,scy,cx,cy);
+   } else if (numberOfContours < 0) {
+      // Compound shapes.
+      int more = 1;
+      stbtt_uint8 *comp = data + g + 10;
+      num_vertices = 0;
+      vertices = 0;
+      while (more) {
+         stbtt_uint16 flags, gidx;
+         int comp_num_verts = 0, i;
+         stbtt_vertex *comp_verts = 0, *tmp = 0;
+         double mtx[6] = {1,0,0,1,0,0}, m, n;
+
+         flags = ttSHORT(comp); comp+=2;
+         gidx = ttSHORT(comp); comp+=2;
+
+         if (flags & 2) { // XY values
+            if (flags & 1) { // shorts
+               mtx[4] = ttSHORT(comp); comp+=2;
+               mtx[5] = ttSHORT(comp); comp+=2;
+            } else {
+               mtx[4] = ttCHAR(comp); comp+=1;
+               mtx[5] = ttCHAR(comp); comp+=1;
+            }
+         }
+         else {
+            // @TODO handle matching point
+            STBTT_assert(0);
+         }
+         if (flags & (1<<3)) { // WE_HAVE_A_SCALE
+            mtx[0] = mtx[3] = ttSHORT(comp)/16384.0f; comp+=2;
+            mtx[1] = mtx[2] = 0;
+         } else if (flags & (1<<6)) { // WE_HAVE_AN_X_AND_YSCALE
+            mtx[0] = ttSHORT(comp)/16384.0f; comp+=2;
+            mtx[1] = mtx[2] = 0;
+            mtx[3] = ttSHORT(comp)/16384.0f; comp+=2;
+         } else if (flags & (1<<7)) { // WE_HAVE_A_TWO_BY_TWO
+            mtx[0] = ttSHORT(comp)/16384.0f; comp+=2;
+            mtx[1] = ttSHORT(comp)/16384.0f; comp+=2;
+            mtx[2] = ttSHORT(comp)/16384.0f; comp+=2;
+            mtx[3] = ttSHORT(comp)/16384.0f; comp+=2;
+         }
+
+         // Find transformation scales.
+         m = (double) STBTT_sqrt(mtx[0]*mtx[0] + mtx[1]*mtx[1]);
+         n = (double) STBTT_sqrt(mtx[2]*mtx[2] + mtx[3]*mtx[3]);
+
+         // Get indexed glyph.
+         comp_num_verts = stbtt_GetGlyphShape(info, gidx, &comp_verts);
+         if (comp_num_verts > 0) {
+            // Transform vertices.
+            for (i = 0; i < comp_num_verts; ++i) {
+               stbtt_vertex* v = &comp_verts[i];
+               stbtt_vertex_type x,y;
+               x=v->x; y=v->y;
+               v->x = (stbtt_vertex_type)(m * (mtx[0]*x + mtx[2]*y + mtx[4]));
+               v->y = (stbtt_vertex_type)(n * (mtx[1]*x + mtx[3]*y + mtx[5]));
+               x=v->cx; y=v->cy;
+               v->cx = (stbtt_vertex_type)(m * (mtx[0]*x + mtx[2]*y + mtx[4]));
+               v->cy = (stbtt_vertex_type)(n * (mtx[1]*x + mtx[3]*y + mtx[5]));
+            }
+            // Append vertices.
+            tmp = (stbtt_vertex*)STBTT_malloc((num_vertices+comp_num_verts)*sizeof(stbtt_vertex), info->userdata);
+            if (!tmp) {
+               if (vertices) STBTT_free(vertices, info->userdata);
+               if (comp_verts) STBTT_free(comp_verts, info->userdata);
+               return 0;
+            }
+            if (num_vertices > 0) STBTT_memcpy(tmp, vertices, num_vertices*sizeof(stbtt_vertex));
+            STBTT_memcpy(tmp+num_vertices, comp_verts, comp_num_verts*sizeof(stbtt_vertex));
+            if (vertices) STBTT_free(vertices, info->userdata);
+            vertices = tmp;
+            STBTT_free(comp_verts, info->userdata);
+            num_vertices += comp_num_verts;
+         }
+         // More components ?
+         more = flags & (1<<5);
+      }
+   } else {
+      // numberOfCounters == 0, do nothing
+   }
+
+   *pvertices = vertices;
+   return num_vertices;
+}
+
+typedef struct
+{
+   int bounds;
+   int started;
+   double first_x, first_y;
+   double x, y;
+   stbtt_int32 min_x, max_x, min_y, max_y;
+
+   stbtt_vertex *pvertices;
+   int num_vertices;
+} stbtt__csctx;
+
+#define STBTT__CSCTX_INIT(bounds) {bounds,0, 0,0, 0,0, 0,0,0,0, NULL, 0}
+
+static void stbtt__track_vertex(stbtt__csctx *c, stbtt_int32 x, stbtt_int32 y)
+{
+   if (x > c->max_x || !c->started) c->max_x = x;
+   if (y > c->max_y || !c->started) c->max_y = y;
+   if (x < c->min_x || !c->started) c->min_x = x;
+   if (y < c->min_y || !c->started) c->min_y = y;
+   c->started = 1;
+}
+
+static void stbtt__csctx_v(stbtt__csctx *c, stbtt_uint8 type, stbtt_int32 x, stbtt_int32 y, stbtt_int32 cx, stbtt_int32 cy, stbtt_int32 cx1, stbtt_int32 cy1)
+{
+   if (c->bounds) {
+      stbtt__track_vertex(c, x, y);
+      if (type == STBTT_vcubic) {
+         stbtt__track_vertex(c, cx, cy);
+         stbtt__track_vertex(c, cx1, cy1);
+      }
+   } else {
+      stbtt_setvertex(&c->pvertices[c->num_vertices], type, x, y, cx, cy);
+      c->pvertices[c->num_vertices].cx1 = (stbtt_int16) cx1;
+      c->pvertices[c->num_vertices].cy1 = (stbtt_int16) cy1;
+   }
+   c->num_vertices++;
+}
+
+static void stbtt__csctx_close_shape(stbtt__csctx *ctx)
+{
+   if (ctx->first_x != ctx->x || ctx->first_y != ctx->y)
+      stbtt__csctx_v(ctx, STBTT_vline, (int)ctx->first_x, (int)ctx->first_y, 0, 0, 0, 0);
+}
+
+static void stbtt__csctx_rmove_to(stbtt__csctx *ctx, double dx, double dy)
+{
+   stbtt__csctx_close_shape(ctx);
+   ctx->first_x = ctx->x = ctx->x + dx;
+   ctx->first_y = ctx->y = ctx->y + dy;
+   stbtt__csctx_v(ctx, STBTT_vmove, (int)ctx->x, (int)ctx->y, 0, 0, 0, 0);
+}
+
+static void stbtt__csctx_rline_to(stbtt__csctx *ctx, double dx, double dy)
+{
+   ctx->x += dx;
+   ctx->y += dy;
+   stbtt__csctx_v(ctx, STBTT_vline, (int)ctx->x, (int)ctx->y, 0, 0, 0, 0);
+}
+
+static void stbtt__csctx_rccurve_to(stbtt__csctx *ctx, double dx1, double dy1, double dx2, double dy2, double dx3, double dy3)
+{
+   double cx1 = ctx->x + dx1;
+   double cy1 = ctx->y + dy1;
+   double cx2 = cx1 + dx2;
+   double cy2 = cy1 + dy2;
+   ctx->x = cx2 + dx3;
+   ctx->y = cy2 + dy3;
+   stbtt__csctx_v(ctx, STBTT_vcubic, (int)ctx->x, (int)ctx->y, (int)cx1, (int)cy1, (int)cx2, (int)cy2);
+}
+
+static stbtt__buf stbtt__get_subr(stbtt__buf idx, int n)
+{
+   int count = stbtt__cff_index_count(&idx);
+   int bias = 107;
+   if (count >= 33900)
+      bias = 32768;
+   else if (count >= 1240)
+      bias = 1131;
+   n += bias;
+   if (n < 0 || n >= count)
+      return stbtt__new_buf(NULL, 0);
+   return stbtt__cff_index_get(idx, n);
+}
+
+static stbtt__buf stbtt__cid_get_glyph_subrs(const stbtt_fontinfo *info, int glyph_index)
+{
+   stbtt__buf fdselect = info->fdselect;
+   int nranges, start, end, v, fmt, fdselector = -1, i;
+
+   stbtt__buf_seek(&fdselect, 0);
+   fmt = stbtt__buf_get8(&fdselect);
+   if (fmt == 0) {
+      // untested
+      stbtt__buf_skip(&fdselect, glyph_index);
+      fdselector = stbtt__buf_get8(&fdselect);
+   } else if (fmt == 3) {
+      nranges = stbtt__buf_get16(&fdselect);
+      start = stbtt__buf_get16(&fdselect);
+      for (i = 0; i < nranges; i++) {
+         v = stbtt__buf_get8(&fdselect);
+         end = stbtt__buf_get16(&fdselect);
+         if (glyph_index >= start && glyph_index < end) {
+            fdselector = v;
+            break;
+         }
+         start = end;
+      }
+   }
+   if (fdselector == -1) stbtt__new_buf(NULL, 0);
+   return stbtt__get_subrs(info->cff, stbtt__cff_index_get(info->fontdicts, fdselector));
+}
+
+static int stbtt__run_charstring(const stbtt_fontinfo *info, int glyph_index, stbtt__csctx *c)
+{
+   int in_header = 1, maskbits = 0, subr_stack_height = 0, sp = 0, v, i, b0;
+   int has_subrs = 0, clear_stack;
+   double s[48];
+   stbtt__buf subr_stack[10], subrs = info->subrs, b;
+   double f;
+
+#define STBTT__CSERR(s) (0)
+
+   // this currently ignores the initial width value, which isn't needed if we have hmtx
+   b = stbtt__cff_index_get(info->charstrings, glyph_index);
+   while (b.cursor < b.size) {
+      i = 0;
+      clear_stack = 1;
+      b0 = stbtt__buf_get8(&b);
+      switch (b0) {
+      // @TODO implement hinting
+      case 0x13: // hintmask
+      case 0x14: // cntrmask
+         if (in_header)
+            maskbits += (sp / 2); // implicit "vstem"
+         in_header = 0;
+         stbtt__buf_skip(&b, (maskbits + 7) / 8);
+         break;
+
+      case 0x01: // hstem
+      case 0x03: // vstem
+      case 0x12: // hstemhm
+      case 0x17: // vstemhm
+         maskbits += (sp / 2);
+         break;
+
+      case 0x15: // rmoveto
+         in_header = 0;
+         if (sp < 2) return STBTT__CSERR("rmoveto stack");
+         stbtt__csctx_rmove_to(c, s[sp-2], s[sp-1]);
+         break;
+      case 0x04: // vmoveto
+         in_header = 0;
+         if (sp < 1) return STBTT__CSERR("vmoveto stack");
+         stbtt__csctx_rmove_to(c, 0, s[sp-1]);
+         break;
+      case 0x16: // hmoveto
+         in_header = 0;
+         if (sp < 1) return STBTT__CSERR("hmoveto stack");
+         stbtt__csctx_rmove_to(c, s[sp-1], 0);
+         break;
+
+      case 0x05: // rlineto
+         if (sp < 2) return STBTT__CSERR("rlineto stack");
+         for (; i + 1 < sp; i += 2)
+            stbtt__csctx_rline_to(c, s[i], s[i+1]);
+         break;
+
+      // hlineto/vlineto and vhcurveto/hvcurveto alternate horizontal and vertical
+      // starting from a different place.
+
+      case 0x07: // vlineto
+         if (sp < 1) return STBTT__CSERR("vlineto stack");
+         goto vlineto;
+      case 0x06: // hlineto
+         if (sp < 1) return STBTT__CSERR("hlineto stack");
+         for (;;) {
+            if (i >= sp) break;
+            stbtt__csctx_rline_to(c, s[i], 0);
+            i++;
+      vlineto:
+            if (i >= sp) break;
+            stbtt__csctx_rline_to(c, 0, s[i]);
+            i++;
+         }
+         break;
+
+      case 0x1F: // hvcurveto
+         if (sp < 4) return STBTT__CSERR("hvcurveto stack");
+         goto hvcurveto;
+      case 0x1E: // vhcurveto
+         if (sp < 4) return STBTT__CSERR("vhcurveto stack");
+         for (;;) {
+            if (i + 3 >= sp) break;
+            stbtt__csctx_rccurve_to(c, 0, s[i], s[i+1], s[i+2], s[i+3], (sp - i == 5) ? s[i + 4] : 0.0f);
+            i += 4;
+      hvcurveto:
+            if (i + 3 >= sp) break;
+            stbtt__csctx_rccurve_to(c, s[i], 0, s[i+1], s[i+2], (sp - i == 5) ? s[i+4] : 0.0f, s[i+3]);
+            i += 4;
+         }
+         break;
+
+      case 0x08: // rrcurveto
+         if (sp < 6) return STBTT__CSERR("rcurveline stack");
+         for (; i + 5 < sp; i += 6)
+            stbtt__csctx_rccurve_to(c, s[i], s[i+1], s[i+2], s[i+3], s[i+4], s[i+5]);
+         break;
+
+      case 0x18: // rcurveline
+         if (sp < 8) return STBTT__CSERR("rcurveline stack");
+         for (; i + 5 < sp - 2; i += 6)
+            stbtt__csctx_rccurve_to(c, s[i], s[i+1], s[i+2], s[i+3], s[i+4], s[i+5]);
+         if (i + 1 >= sp) return STBTT__CSERR("rcurveline stack");
+         stbtt__csctx_rline_to(c, s[i], s[i+1]);
+         break;
+
+      case 0x19: // rlinecurve
+         if (sp < 8) return STBTT__CSERR("rlinecurve stack");
+         for (; i + 1 < sp - 6; i += 2)
+            stbtt__csctx_rline_to(c, s[i], s[i+1]);
+         if (i + 5 >= sp) return STBTT__CSERR("rlinecurve stack");
+         stbtt__csctx_rccurve_to(c, s[i], s[i+1], s[i+2], s[i+3], s[i+4], s[i+5]);
+         break;
+
+      case 0x1A: // vvcurveto
+      case 0x1B: // hhcurveto
+         if (sp < 4) return STBTT__CSERR("(vv|hh)curveto stack");
+         f = 0.0;
+         if (sp & 1) { f = s[i]; i++; }
+         for (; i + 3 < sp; i += 4) {
+            if (b0 == 0x1B)
+               stbtt__csctx_rccurve_to(c, s[i], f, s[i+1], s[i+2], s[i+3], 0.0);
+            else
+               stbtt__csctx_rccurve_to(c, f, s[i], s[i+1], s[i+2], 0.0, s[i+3]);
+            f = 0.0;
+         }
+         break;
+
+      case 0x0A: // callsubr
+         if (!has_subrs) {
+            if (info->fdselect.size)
+               subrs = stbtt__cid_get_glyph_subrs(info, glyph_index);
+            has_subrs = 1;
+         }
+         // fallthrough
+      case 0x1D: // callgsubr
+         if (sp < 1) return STBTT__CSERR("call(g|)subr stack");
+         v = (int) s[--sp];
+         if (subr_stack_height >= 10) return STBTT__CSERR("recursion limit");
+         subr_stack[subr_stack_height++] = b;
+         b = stbtt__get_subr(b0 == 0x0A ? subrs : info->gsubrs, v);
+         if (b.size == 0) return STBTT__CSERR("subr not found");
+         b.cursor = 0;
+         clear_stack = 0;
+         break;
+
+      case 0x0B: // return
+         if (subr_stack_height <= 0) return STBTT__CSERR("return outside subr");
+         b = subr_stack[--subr_stack_height];
+         clear_stack = 0;
+         break;
+
+      case 0x0E: // endchar
+         stbtt__csctx_close_shape(c);
+         return 1;
+
+      case 0x0C: { // two-byte escape
+         double dx1, dx2, dx3, dx4, dx5, dx6, dy1, dy2, dy3, dy4, dy5, dy6;
+         double dx, dy;
+         int b1 = stbtt__buf_get8(&b);
+         switch (b1) {
+         // @TODO These "flex" implementations ignore the flex-depth and resolution,
+         // and always draw beziers.
+         case 0x22: // hflex
+            if (sp < 7) return STBTT__CSERR("hflex stack");
+            dx1 = s[0];
+            dx2 = s[1];
+            dy2 = s[2];
+            dx3 = s[3];
+            dx4 = s[4];
+            dx5 = s[5];
+            dx6 = s[6];
+            stbtt__csctx_rccurve_to(c, dx1, 0, dx2, dy2, dx3, 0);
+            stbtt__csctx_rccurve_to(c, dx4, 0, dx5, -dy2, dx6, 0);
+            break;
+
+         case 0x23: // flex
+            if (sp < 13) return STBTT__CSERR("flex stack");
+            dx1 = s[0];
+            dy1 = s[1];
+            dx2 = s[2];
+            dy2 = s[3];
+            dx3 = s[4];
+            dy3 = s[5];
+            dx4 = s[6];
+            dy4 = s[7];
+            dx5 = s[8];
+            dy5 = s[9];
+            dx6 = s[10];
+            dy6 = s[11];
+            //fd is s[12]
+            stbtt__csctx_rccurve_to(c, dx1, dy1, dx2, dy2, dx3, dy3);
+            stbtt__csctx_rccurve_to(c, dx4, dy4, dx5, dy5, dx6, dy6);
+            break;
+
+         case 0x24: // hflex1
+            if (sp < 9) return STBTT__CSERR("hflex1 stack");
+            dx1 = s[0];
+            dy1 = s[1];
+            dx2 = s[2];
+            dy2 = s[3];
+            dx3 = s[4];
+            dx4 = s[5];
+            dx5 = s[6];
+            dy5 = s[7];
+            dx6 = s[8];
+            stbtt__csctx_rccurve_to(c, dx1, dy1, dx2, dy2, dx3, 0);
+            stbtt__csctx_rccurve_to(c, dx4, 0, dx5, dy5, dx6, -(dy1+dy2+dy5));
+            break;
+
+         case 0x25: // flex1
+            if (sp < 11) return STBTT__CSERR("flex1 stack");
+            dx1 = s[0];
+            dy1 = s[1];
+            dx2 = s[2];
+            dy2 = s[3];
+            dx3 = s[4];
+            dy3 = s[5];
+            dx4 = s[6];
+            dy4 = s[7];
+            dx5 = s[8];
+            dy5 = s[9];
+            dx6 = dy6 = s[10];
+            dx = dx1+dx2+dx3+dx4+dx5;
+            dy = dy1+dy2+dy3+dy4+dy5;
+            if (STBTT_fabs(dx) > STBTT_fabs(dy))
+               dy6 = -dy;
+            else
+               dx6 = -dx;
+            stbtt__csctx_rccurve_to(c, dx1, dy1, dx2, dy2, dx3, dy3);
+            stbtt__csctx_rccurve_to(c, dx4, dy4, dx5, dy5, dx6, dy6);
+            break;
+
+         default:
+            return STBTT__CSERR("unimplemented");
+         }
+      } break;
+
+      default:
+         if (b0 != 255 && b0 != 28 && (b0 < 32 || b0 > 254))
+            return STBTT__CSERR("reserved operator");
+
+         // push immediate
+         if (b0 == 255) {
+            f = (double)(stbtt_int32)stbtt__buf_get32(&b) / 0x10000;
+         } else {
+            stbtt__buf_skip(&b, -1);
+            f = (double)(stbtt_int16)stbtt__cff_int(&b);
+         }
+         if (sp >= 48) return STBTT__CSERR("push stack overflow");
+         s[sp++] = f;
+         clear_stack = 0;
+         break;
+      }
+      if (clear_stack) sp = 0;
+   }
+   return STBTT__CSERR("no endchar");
+
+#undef STBTT__CSERR
+}
+
+static int stbtt__GetGlyphShapeT2(const stbtt_fontinfo *info, int glyph_index, stbtt_vertex **pvertices)
+{
+   // runs the charstring twice, once to count and once to output (to avoid realloc)
+   stbtt__csctx count_ctx = STBTT__CSCTX_INIT(1);
+   stbtt__csctx output_ctx = STBTT__CSCTX_INIT(0);
+   if (stbtt__run_charstring(info, glyph_index, &count_ctx)) {
+      *pvertices = (stbtt_vertex*)STBTT_malloc(count_ctx.num_vertices*sizeof(stbtt_vertex), info->userdata);
+      output_ctx.pvertices = *pvertices;
+      if (stbtt__run_charstring(info, glyph_index, &output_ctx)) {
+         STBTT_assert(output_ctx.num_vertices == count_ctx.num_vertices);
+         return output_ctx.num_vertices;
+      }
+   }
+   *pvertices = NULL;
+   return 0;
+}
+
+static int stbtt__GetGlyphInfoT2(const stbtt_fontinfo *info, int glyph_index, int *x0, int *y0, int *x1, int *y1)
+{
+   stbtt__csctx c = STBTT__CSCTX_INIT(1);
+   int r = stbtt__run_charstring(info, glyph_index, &c);
+   if (x0)  *x0 = r ? c.min_x : 0;
+   if (y0)  *y0 = r ? c.min_y : 0;
+   if (x1)  *x1 = r ? c.max_x : 0;
+   if (y1)  *y1 = r ? c.max_y : 0;
+   return r ? c.num_vertices : 0;
+}
+
+STBTT_DEF int stbtt_GetGlyphShape(const stbtt_fontinfo *info, int glyph_index, stbtt_vertex **pvertices)
+{
+   if (!info->cff.size)
+      return stbtt__GetGlyphShapeTT(info, glyph_index, pvertices);
+   else
+      return stbtt__GetGlyphShapeT2(info, glyph_index, pvertices);
+}
+
+STBTT_DEF void stbtt_GetGlyphHMetrics(const stbtt_fontinfo *info, int glyph_index, int *advanceWidth, int *leftSideBearing)
+{
+   stbtt_uint16 numOfLongHorMetrics = ttUSHORT(info->data+info->hhea + 34);
+   if (glyph_index < numOfLongHorMetrics) {
+      if (advanceWidth)     *advanceWidth    = ttSHORT(info->data + info->hmtx + 4*glyph_index);
+      if (leftSideBearing)  *leftSideBearing = ttSHORT(info->data + info->hmtx + 4*glyph_index + 2);
+   } else {
+      if (advanceWidth)     *advanceWidth    = ttSHORT(info->data + info->hmtx + 4*(numOfLongHorMetrics-1));
+      if (leftSideBearing)  *leftSideBearing = ttSHORT(info->data + info->hmtx + 4*numOfLongHorMetrics + 2*(glyph_index - numOfLongHorMetrics));
+   }
+}
+
+STBTT_DEF int  stbtt_GetKerningTableLength(const stbtt_fontinfo *info)
+{
+   stbtt_uint8 *data = info->data + info->kern;
+
+   // we only look at the first table. it must be 'horizontal' and format 0.
+   if (!info->kern)
+      return 0;
+   if (ttUSHORT(data+2) < 1) // number of tables, need at least 1
+      return 0;
+   if (ttUSHORT(data+8) != 1) // horizontal flag must be set in format
+      return 0;
+
+   return ttUSHORT(data+10);
+}
+
+STBTT_DEF int stbtt_GetKerningTable(const stbtt_fontinfo *info, stbtt_kerningentry* table, int table_length)
+{
+   stbtt_uint8 *data = info->data + info->kern;
+   int k, length;
+
+   // we only look at the first table. it must be 'horizontal' and format 0.
+   if (!info->kern)
+      return 0;
+   if (ttUSHORT(data+2) < 1) // number of tables, need at least 1
+      return 0;
+   if (ttUSHORT(data+8) != 1) // horizontal flag must be set in format
+      return 0;
+
+   length = ttUSHORT(data+10);
+   if (table_length < length)
+      length = table_length;
+
+   for (k = 0; k < length; k++)
+   {
+      table[k].glyph1 = ttUSHORT(data+18+(k*6));
+      table[k].glyph2 = ttUSHORT(data+20+(k*6));
+      table[k].advance = ttSHORT(data+22+(k*6));
+   }
+
+   return length;
+}
+
+static int  stbtt__GetGlyphKernInfoAdvance(const stbtt_fontinfo *info, int glyph1, int glyph2)
+{
+   stbtt_uint8 *data = info->data + info->kern;
+   stbtt_uint32 needle, straw;
+   int l, r, m;
+
+   // we only look at the first table. it must be 'horizontal' and format 0.
+   if (!info->kern)
+      return 0;
+   if (ttUSHORT(data+2) < 1) // number of tables, need at least 1
+      return 0;
+   if (ttUSHORT(data+8) != 1) // horizontal flag must be set in format
+      return 0;
+
+   l = 0;
+   r = ttUSHORT(data+10) - 1;
+   needle = glyph1 << 16 | glyph2;
+   while (l <= r) {
+      m = (l + r) >> 1;
+      straw = ttULONG(data+18+(m*6)); // note: unaligned read
+      if (needle < straw)
+         r = m - 1;
+      else if (needle > straw)
+         l = m + 1;
+      else
+         return ttSHORT(data+22+(m*6));
+   }
+   return 0;
+}
+
+static stbtt_int32  stbtt__GetCoverageIndex(stbtt_uint8 *coverageTable, int glyph)
+{
+    stbtt_uint16 coverageFormat = ttUSHORT(coverageTable);
+    switch(coverageFormat) {
+        case 1: {
+            stbtt_uint16 glyphCount = ttUSHORT(coverageTable + 2);
+
+            // Binary search.
+            stbtt_int32 l=0, r=glyphCount-1, m;
+            int straw, needle=glyph;
+            while (l <= r) {
+                stbtt_uint8 *glyphArray = coverageTable + 4;
+                stbtt_uint16 glyphID;
+                m = (l + r) >> 1;
+                glyphID = ttUSHORT(glyphArray + 2 * m);
+                straw = glyphID;
+                if (needle < straw)
+                    r = m - 1;
+                else if (needle > straw)
+                    l = m + 1;
+                else {
+                     return m;
+                }
+            }
+        } break;
+
+        case 2: {
+            stbtt_uint16 rangeCount = ttUSHORT(coverageTable + 2);
+            stbtt_uint8 *rangeArray = coverageTable + 4;
+
+            // Binary search.
+            stbtt_int32 l=0, r=rangeCount-1, m;
+            int strawStart, strawEnd, needle=glyph;
+            while (l <= r) {
+                stbtt_uint8 *rangeRecord;
+                m = (l + r) >> 1;
+                rangeRecord = rangeArray + 6 * m;
+                strawStart = ttUSHORT(rangeRecord);
+                strawEnd = ttUSHORT(rangeRecord + 2);
+                if (needle < strawStart)
+                    r = m - 1;
+                else if (needle > strawEnd)
+                    l = m + 1;
+                else {
+                    stbtt_uint16 startCoverageIndex = ttUSHORT(rangeRecord + 4);
+                    return startCoverageIndex + glyph - strawStart;
+                }
+            }
+        } break;
+
+        default: {
+            // There are no other cases.
+            STBTT_assert(0);
+        } break;
+    }
+
+    return -1;
+}
+
+static stbtt_int32  stbtt__GetGlyphClass(stbtt_uint8 *classDefTable, int glyph)
+{
+    stbtt_uint16 classDefFormat = ttUSHORT(classDefTable);
+    switch(classDefFormat)
+    {
+        case 1: {
+            stbtt_uint16 startGlyphID = ttUSHORT(classDefTable + 2);
+            stbtt_uint16 glyphCount = ttUSHORT(classDefTable + 4);
+            stbtt_uint8 *classDef1ValueArray = classDefTable + 6;
+
+            if (glyph >= startGlyphID && glyph < startGlyphID + glyphCount)
+                return (stbtt_int32)ttUSHORT(classDef1ValueArray + 2 * (glyph - startGlyphID));
+
+            classDefTable = classDef1ValueArray + 2 * glyphCount;
+        } break;
+
+        case 2: {
+            stbtt_uint16 classRangeCount = ttUSHORT(classDefTable + 2);
+            stbtt_uint8 *classRangeRecords = classDefTable + 4;
+
+            // Binary search.
+            stbtt_int32 l=0, r=classRangeCount-1, m;
+            int strawStart, strawEnd, needle=glyph;
+            while (l <= r) {
+                stbtt_uint8 *classRangeRecord;
+                m = (l + r) >> 1;
+                classRangeRecord = classRangeRecords + 6 * m;
+                strawStart = ttUSHORT(classRangeRecord);
+                strawEnd = ttUSHORT(classRangeRecord + 2);
+                if (needle < strawStart)
+                    r = m - 1;
+                else if (needle > strawEnd)
+                    l = m + 1;
+                else
+                    return (stbtt_int32)ttUSHORT(classRangeRecord + 4);
+            }
+
+            classDefTable = classRangeRecords + 6 * classRangeCount;
+        } break;
+
+        default: {
+            // There are no other cases.
+            STBTT_assert(0);
+        } break;
+    }
+
+    return -1;
+}
+
+// Define to STBTT_assert(x) if you want to break on unimplemented formats.
+#define STBTT_GPOS_TODO_assert(x)
+
+static stbtt_int32  stbtt__GetGlyphGPOSInfoAdvance(const stbtt_fontinfo *info, int glyph1, int glyph2)
+{
+    stbtt_uint16 lookupListOffset;
+    stbtt_uint8 *lookupList;
+    stbtt_uint16 lookupCount;
+    stbtt_uint8 *data;
+    stbtt_int32 i;
+
+    if (!info->gpos) return 0;
+
+    data = info->data + info->gpos;
+
+    if (ttUSHORT(data+0) != 1) return 0; // Major version 1
+    if (ttUSHORT(data+2) != 0) return 0; // Minor version 0
+
+    lookupListOffset = ttUSHORT(data+8);
+    lookupList = data + lookupListOffset;
+    lookupCount = ttUSHORT(lookupList);
+
+    for (i=0; i<lookupCount; ++i) {
+        stbtt_uint16 lookupOffset = ttUSHORT(lookupList + 2 + 2 * i);
+        stbtt_uint8 *lookupTable = lookupList + lookupOffset;
+
+        stbtt_uint16 lookupType = ttUSHORT(lookupTable);
+        stbtt_uint16 subTableCount = ttUSHORT(lookupTable + 4);
+        stbtt_uint8 *subTableOffsets = lookupTable + 6;
+        switch(lookupType) {
+            case 2: { // Pair Adjustment Positioning Subtable
+                stbtt_int32 sti;
+                for (sti=0; sti<subTableCount; sti++) {
+                    stbtt_uint16 subtableOffset = ttUSHORT(subTableOffsets + 2 * sti);
+                    stbtt_uint8 *table = lookupTable + subtableOffset;
+                    stbtt_uint16 posFormat = ttUSHORT(table);
+                    stbtt_uint16 coverageOffset = ttUSHORT(table + 2);
+                    stbtt_int32 coverageIndex = stbtt__GetCoverageIndex(table + coverageOffset, glyph1);
+                    if (coverageIndex == -1) continue;
+
+                    switch (posFormat) {
+                        case 1: {
+                            stbtt_int32 l, r, m;
+                            int straw, needle;
+                            stbtt_uint16 valueFormat1 = ttUSHORT(table + 4);
+                            stbtt_uint16 valueFormat2 = ttUSHORT(table + 6);
+                            stbtt_int32 valueRecordPairSizeInBytes = 2;
+                            stbtt_uint16 pairSetCount = ttUSHORT(table + 8);
+                            stbtt_uint16 pairPosOffset = ttUSHORT(table + 10 + 2 * coverageIndex);
+                            stbtt_uint8 *pairValueTable = table + pairPosOffset;
+                            stbtt_uint16 pairValueCount = ttUSHORT(pairValueTable);
+                            stbtt_uint8 *pairValueArray = pairValueTable + 2;
+                            // TODO: Support more formats.
+                            STBTT_GPOS_TODO_assert(valueFormat1 == 4);
+                            if (valueFormat1 != 4) return 0;
+                            STBTT_GPOS_TODO_assert(valueFormat2 == 0);
+                            if (valueFormat2 != 0) return 0;
+
+                            STBTT_assert(coverageIndex < pairSetCount);
+                            STBTT__NOTUSED(pairSetCount);
+
+                            needle=glyph2;
+                            r=pairValueCount-1;
+                            l=0;
+
+                            // Binary search.
+                            while (l <= r) {
+                                stbtt_uint16 secondGlyph;
+                                stbtt_uint8 *pairValue;
+                                m = (l + r) >> 1;
+                                pairValue = pairValueArray + (2 + valueRecordPairSizeInBytes) * m;
+                                secondGlyph = ttUSHORT(pairValue);
+                                straw = secondGlyph;
+                                if (needle < straw)
+                                    r = m - 1;
+                                else if (needle > straw)
+                                    l = m + 1;
+                                else {
+                                    stbtt_int16 xAdvance = ttSHORT(pairValue + 2);
+                                    return xAdvance;
+                                }
+                            }
+                        } break;
+
+                        case 2: {
+                            stbtt_uint16 valueFormat1 = ttUSHORT(table + 4);
+                            stbtt_uint16 valueFormat2 = ttUSHORT(table + 6);
+
+                            stbtt_uint16 classDef1Offset = ttUSHORT(table + 8);
+                            stbtt_uint16 classDef2Offset = ttUSHORT(table + 10);
+                            int glyph1class = stbtt__GetGlyphClass(table + classDef1Offset, glyph1);
+                            int glyph2class = stbtt__GetGlyphClass(table + classDef2Offset, glyph2);
+
+                            stbtt_uint16 class1Count = ttUSHORT(table + 12);
+                            stbtt_uint16 class2Count = ttUSHORT(table + 14);
+                            STBTT_assert(glyph1class < class1Count);
+                            STBTT_assert(glyph2class < class2Count);
+
+                            // TODO: Support more formats.
+                            STBTT_GPOS_TODO_assert(valueFormat1 == 4);
+                            if (valueFormat1 != 4) return 0;
+                            STBTT_GPOS_TODO_assert(valueFormat2 == 0);
+                            if (valueFormat2 != 0) return 0;
+
+                            if (glyph1class >= 0 && glyph1class < class1Count && glyph2class >= 0 && glyph2class < class2Count) {
+                                stbtt_uint8 *class1Records = table + 16;
+                                stbtt_uint8 *class2Records = class1Records + 2 * (glyph1class * class2Count);
+                                stbtt_int16 xAdvance = ttSHORT(class2Records + 2 * glyph2class);
+                                return xAdvance;
+                            }
+                        } break;
+
+                        default: {
+                            // There are no other cases.
+                            STBTT_assert(0);
+                            break;
+                        };
+                    }
+                }
+                break;
+            };
+
+            default:
+                // TODO: Implement other stuff.
+                break;
+        }
+    }
+
+    return 0;
+}
+
+STBTT_DEF int  stbtt_GetGlyphKernAdvance(const stbtt_fontinfo *info, int g1, int g2)
+{
+   int xAdvance = 0;
+
+   if (info->gpos)
+      xAdvance += stbtt__GetGlyphGPOSInfoAdvance(info, g1, g2);
+   else if (info->kern)
+      xAdvance += stbtt__GetGlyphKernInfoAdvance(info, g1, g2);
+
+   return xAdvance;
+}
+
+STBTT_DEF int  stbtt_GetCodepointKernAdvance(const stbtt_fontinfo *info, int ch1, int ch2)
+{
+   if (!info->kern && !info->gpos) // if no kerning table, don't waste time looking up both codepoint->glyphs
+      return 0;
+   return stbtt_GetGlyphKernAdvance(info, stbtt_FindGlyphIndex(info,ch1), stbtt_FindGlyphIndex(info,ch2));
+}
+
+STBTT_DEF void stbtt_GetCodepointHMetrics(const stbtt_fontinfo *info, int codepoint, int *advanceWidth, int *leftSideBearing)
+{
+   stbtt_GetGlyphHMetrics(info, stbtt_FindGlyphIndex(info,codepoint), advanceWidth, leftSideBearing);
+}
+
+STBTT_DEF void stbtt_GetFontVMetrics(const stbtt_fontinfo *info, int *ascent, int *descent, int *lineGap)
+{
+   if (ascent ) *ascent  = ttSHORT(info->data+info->hhea + 4);
+   if (descent) *descent = ttSHORT(info->data+info->hhea + 6);
+   if (lineGap) *lineGap = ttSHORT(info->data+info->hhea + 8);
+}
+
+STBTT_DEF int  stbtt_GetFontVMetricsOS2(const stbtt_fontinfo *info, int *typoAscent, int *typoDescent, int *typoLineGap)
+{
+   int tab = stbtt__find_table(info->data, info->fontstart, "OS/2");
+   if (!tab)
+      return 0;
+   if (typoAscent ) *typoAscent  = ttSHORT(info->data+tab + 68);
+   if (typoDescent) *typoDescent = ttSHORT(info->data+tab + 70);
+   if (typoLineGap) *typoLineGap = ttSHORT(info->data+tab + 72);
+   return 1;
+}
+
+STBTT_DEF void stbtt_GetFontBoundingBox(const stbtt_fontinfo *info, int *x0, int *y0, int *x1, int *y1)
+{
+   *x0 = ttSHORT(info->data + info->head + 36);
+   *y0 = ttSHORT(info->data + info->head + 38);
+   *x1 = ttSHORT(info->data + info->head + 40);
+   *y1 = ttSHORT(info->data + info->head + 42);
+}
+
+STBTT_DEF double stbtt_ScaleForPixelHeight(const stbtt_fontinfo *info, double height)
+{
+   int fheight = ttSHORT(info->data + info->hhea + 4) - ttSHORT(info->data + info->hhea + 6);
+   return (double) height / fheight;
+}
+
+STBTT_DEF double stbtt_ScaleForMappingEmToPixels(const stbtt_fontinfo *info, double pixels)
+{
+   int unitsPerEm = ttUSHORT(info->data + info->head + 18);
+   return pixels / unitsPerEm;
+}
+
+STBTT_DEF void stbtt_FreeShape(const stbtt_fontinfo *info, stbtt_vertex *v)
+{
+   STBTT_free(v, info->userdata);
+}
+
+STBTT_DEF stbtt_uint8 *stbtt_FindSVGDoc(const stbtt_fontinfo *info, int gl)
+{
+   int i;
+   stbtt_uint8 *data = info->data;
+   stbtt_uint8 *svg_doc_list = data + stbtt__get_svg((stbtt_fontinfo *) info);
+
+   int numEntries = ttUSHORT(svg_doc_list);
+   stbtt_uint8 *svg_docs = svg_doc_list + 2;
+
+   for(i=0; i<numEntries; i++) {
+      stbtt_uint8 *svg_doc = svg_docs + (12 * i);
+      if ((gl >= ttUSHORT(svg_doc)) && (gl <= ttUSHORT(svg_doc + 2)))
+         return svg_doc;
+   }
+   return 0;
+}
+
+STBTT_DEF int stbtt_GetGlyphSVG(const stbtt_fontinfo *info, int gl, const char **svg)
+{
+   stbtt_uint8 *data = info->data;
+   stbtt_uint8 *svg_doc;
+
+   if (info->svg == 0)
+      return 0;
+
+   svg_doc = stbtt_FindSVGDoc(info, gl);
+   if (svg_doc != NULL) {
+      *svg = (char *) data + info->svg + ttULONG(svg_doc + 4);
+      return ttULONG(svg_doc + 8);
+   } else {
+      return 0;
+   }
+}
+
+STBTT_DEF int stbtt_GetCodepointSVG(const stbtt_fontinfo *info, int unicode_codepoint, const char **svg)
+{
+   return stbtt_GetGlyphSVG(info, stbtt_FindGlyphIndex(info, unicode_codepoint), svg);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// antialiasing software rasterizer
+//
+
+STBTT_DEF void stbtt_GetGlyphBitmapBoxSubpixel(const stbtt_fontinfo *font, int glyph, double scale_x, double scale_y,double shift_x, double shift_y, int *ix0, int *iy0, int *ix1, int *iy1)
+{
+   int x0=0,y0=0,x1,y1; // =0 suppresses compiler warning
+   if (!stbtt_GetGlyphBox(font, glyph, &x0,&y0,&x1,&y1)) {
+      // e.g. space character
+      if (ix0) *ix0 = 0;
+      if (iy0) *iy0 = 0;
+      if (ix1) *ix1 = 0;
+      if (iy1) *iy1 = 0;
+   } else {
+      // move to integral bboxes (treating pixels as little squares, what pixels get touched)?
+      if (ix0) *ix0 = STBTT_ifloor( x0 * scale_x + shift_x);
+      if (iy0) *iy0 = STBTT_ifloor(-y1 * scale_y + shift_y);
+      if (ix1) *ix1 = STBTT_iceil ( x1 * scale_x + shift_x);
+      if (iy1) *iy1 = STBTT_iceil (-y0 * scale_y + shift_y);
+   }
+}
+
+STBTT_DEF void stbtt_GetGlyphBitmapBox(const stbtt_fontinfo *font, int glyph, double scale_x, double scale_y, int *ix0, int *iy0, int *ix1, int *iy1)
+{
+   stbtt_GetGlyphBitmapBoxSubpixel(font, glyph, scale_x, scale_y,0.0f,0.0f, ix0, iy0, ix1, iy1);
+}
+
+STBTT_DEF void stbtt_GetCodepointBitmapBoxSubpixel(const stbtt_fontinfo *font, int codepoint, double scale_x, double scale_y, double shift_x, double shift_y, int *ix0, int *iy0, int *ix1, int *iy1)
+{
+   stbtt_GetGlyphBitmapBoxSubpixel(font, stbtt_FindGlyphIndex(font,codepoint), scale_x, scale_y,shift_x,shift_y, ix0,iy0,ix1,iy1);
+}
+
+STBTT_DEF void stbtt_GetCodepointBitmapBox(const stbtt_fontinfo *font, int codepoint, double scale_x, double scale_y, int *ix0, int *iy0, int *ix1, int *iy1)
+{
+   stbtt_GetCodepointBitmapBoxSubpixel(font, codepoint, scale_x, scale_y,0.0f,0.0f, ix0,iy0,ix1,iy1);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//  Rasterizer
+
+typedef struct stbtt__hheap_chunk
+{
+   struct stbtt__hheap_chunk *next;
+} stbtt__hheap_chunk;
+
+typedef struct stbtt__hheap
+{
+   struct stbtt__hheap_chunk *head;
+   void   *first_free;
+   int    num_remaining_in_head_chunk;
+} stbtt__hheap;
+
+static void *stbtt__hheap_alloc(stbtt__hheap *hh, size_t size, void *userdata)
+{
+   if (hh->first_free) {
+      void *p = hh->first_free;
+      hh->first_free = * (void **) p;
+      return p;
+   } else {
+      if (hh->num_remaining_in_head_chunk == 0) {
+         int count = (size < 32 ? 2000 : size < 128 ? 800 : 100);
+         stbtt__hheap_chunk *c = (stbtt__hheap_chunk *) STBTT_malloc(sizeof(stbtt__hheap_chunk) + size * count, userdata);
+         if (c == NULL)
+            return NULL;
+         c->next = hh->head;
+         hh->head = c;
+         hh->num_remaining_in_head_chunk = count;
+      }
+      --hh->num_remaining_in_head_chunk;
+      return (char *) (hh->head) + sizeof(stbtt__hheap_chunk) + size * hh->num_remaining_in_head_chunk;
+   }
+}
+
+static void stbtt__hheap_free(stbtt__hheap *hh, void *p)
+{
+   *(void **) p = hh->first_free;
+   hh->first_free = p;
+}
+
+static void stbtt__hheap_cleanup(stbtt__hheap *hh, void *userdata)
+{
+   stbtt__hheap_chunk *c = hh->head;
+   while (c) {
+      stbtt__hheap_chunk *n = c->next;
+      STBTT_free(c, userdata);
+      c = n;
+   }
+}
+
+typedef struct stbtt__edge {
+   double x0,y0, x1,y1;
+   int invert;
+} stbtt__edge;
+
+
+typedef struct stbtt__active_edge
+{
+   struct stbtt__active_edge *next;
+   #if STBTT_RASTERIZER_VERSION==1
+   int x,dx;
+   double ey;
+   int direction;
+   #elif STBTT_RASTERIZER_VERSION==2
+   double fx,fdx,fdy;
+   double direction;
+   double sy;
+   double ey;
+   #else
+   #error "Unrecognized value of STBTT_RASTERIZER_VERSION"
+   #endif
+} stbtt__active_edge;
+
+#if STBTT_RASTERIZER_VERSION == 1
+#define STBTT_FIXSHIFT   10
+#define STBTT_FIX        (1 << STBTT_FIXSHIFT)
+#define STBTT_FIXMASK    (STBTT_FIX-1)
+
+static stbtt__active_edge *stbtt__new_active(stbtt__hheap *hh, stbtt__edge *e, int off_x, double start_point, void *userdata)
+{
+   stbtt__active_edge *z = (stbtt__active_edge *) stbtt__hheap_alloc(hh, sizeof(*z), userdata);
+   double dxdy = (e->x1 - e->x0) / (e->y1 - e->y0);
+   STBTT_assert(z != NULL);
+   if (!z) return z;
+
+   // round dx down to avoid overshooting
+   if (dxdy < 0)
+      z->dx = -STBTT_ifloor(STBTT_FIX * -dxdy);
+   else
+      z->dx = STBTT_ifloor(STBTT_FIX * dxdy);
+
+   z->x = STBTT_ifloor(STBTT_FIX * e->x0 + z->dx * (start_point - e->y0)); // use z->dx so when we offset later it's by the same amount
+   z->x -= off_x * STBTT_FIX;
+
+   z->ey = e->y1;
+   z->next = 0;
+   z->direction = e->invert ? 1 : -1;
+   return z;
+}
+#elif STBTT_RASTERIZER_VERSION == 2
+static stbtt__active_edge *stbtt__new_active(stbtt__hheap *hh, stbtt__edge *e, int off_x, double start_point, void *userdata)
+{
+   stbtt__active_edge *z = (stbtt__active_edge *) stbtt__hheap_alloc(hh, sizeof(*z), userdata);
+   double dxdy = (e->x1 - e->x0) / (e->y1 - e->y0);
+   STBTT_assert(z != NULL);
+   //STBTT_assert(e->y0 <= start_point);
+   if (!z) return z;
+   z->fdx = dxdy;
+   z->fdy = dxdy != 0.0f ? (1.0f/dxdy) : 0.0f;
+   z->fx = e->x0 + dxdy * (start_point - e->y0);
+   z->fx -= off_x;
+   z->direction = e->invert ? 1.0f : -1.0f;
+   z->sy = e->y0;
+   z->ey = e->y1;
+   z->next = 0;
+   return z;
+}
+#else
+#error "Unrecognized value of STBTT_RASTERIZER_VERSION"
+#endif
+
+#if STBTT_RASTERIZER_VERSION == 1
+// note: this routine clips fills that extend off the edges... ideally this
+// wouldn't happen, but it could happen if the truetype glyph bounding boxes
+// are wrong, or if the user supplies a too-small bitmap
+static void stbtt__fill_active_edges(unsigned char *scanline, int len, stbtt__active_edge *e, int max_weight)
+{
+   // non-zero winding fill
+   int x0=0, w=0;
+
+   while (e) {
+      if (w == 0) {
+         // if we're currently at zero, we need to record the edge start point
+         x0 = e->x; w += e->direction;
+      } else {
+         int x1 = e->x; w += e->direction;
+         // if we went to zero, we need to draw
+         if (w == 0) {
+            int i = x0 >> STBTT_FIXSHIFT;
+            int j = x1 >> STBTT_FIXSHIFT;
+
+            if (i < len && j >= 0) {
+               if (i == j) {
+                  // x0,x1 are the same pixel, so compute combined coverage
+                  scanline[i] = scanline[i] + (stbtt_uint8) ((x1 - x0) * max_weight >> STBTT_FIXSHIFT);
+               } else {
+                  if (i >= 0) // add antialiasing for x0
+                     scanline[i] = scanline[i] + (stbtt_uint8) (((STBTT_FIX - (x0 & STBTT_FIXMASK)) * max_weight) >> STBTT_FIXSHIFT);
+                  else
+                     i = -1; // clip
+
+                  if (j < len) // add antialiasing for x1
+                     scanline[j] = scanline[j] + (stbtt_uint8) (((x1 & STBTT_FIXMASK) * max_weight) >> STBTT_FIXSHIFT);
+                  else
+                     j = len; // clip
+
+                  for (++i; i < j; ++i) // fill pixels between x0 and x1
+                     scanline[i] = scanline[i] + (stbtt_uint8) max_weight;
+               }
+            }
+         }
+      }
+
+      e = e->next;
+   }
+}
+
+static void stbtt__rasterize_sorted_edges(stbtt__bitmap *result, stbtt__edge *e, int n, int vsubsample, int off_x, int off_y, void *userdata)
+{
+   stbtt__hheap hh = { 0, 0, 0 };
+   stbtt__active_edge *active = NULL;
+   int y,j=0;
+   int max_weight = (255 / vsubsample);  // weight per vertical scanline
+   int s; // vertical subsample index
+   unsigned char scanline_data[512], *scanline;
+
+   if (result->w > 512)
+      scanline = (unsigned char *) STBTT_malloc(result->w, userdata);
+   else
+      scanline = scanline_data;
+
+   y = off_y * vsubsample;
+   e[n].y0 = (off_y + result->h) * (double) vsubsample + 1;
+
+   while (j < result->h) {
+      STBTT_memset(scanline, 0, result->w);
+      for (s=0; s < vsubsample; ++s) {
+         // find center of pixel for this scanline
+         double scan_y = y + 0.5f;
+         stbtt__active_edge **step = &active;
+
+         // update all active edges;
+         // remove all active edges that terminate before the center of this scanline
+         while (*step) {
+            stbtt__active_edge * z = *step;
+            if (z->ey <= scan_y) {
+               *step = z->next; // delete from list
+               STBTT_assert(z->direction);
+               z->direction = 0;
+               stbtt__hheap_free(&hh, z);
+            } else {
+               z->x += z->dx; // advance to position for current scanline
+               step = &((*step)->next); // advance through list
+            }
+         }
+
+         // resort the list if needed
+         for(;;) {
+            int changed=0;
+            step = &active;
+            while (*step && (*step)->next) {
+               if ((*step)->x > (*step)->next->x) {
+                  stbtt__active_edge *t = *step;
+                  stbtt__active_edge *q = t->next;
+
+                  t->next = q->next;
+                  q->next = t;
+                  *step = q;
+                  changed = 1;
+               }
+               step = &(*step)->next;
+            }
+            if (!changed) break;
+         }
+
+         // insert all edges that start before the center of this scanline -- omit ones that also end on this scanline
+         while (e->y0 <= scan_y) {
+            if (e->y1 > scan_y) {
+               stbtt__active_edge *z = stbtt__new_active(&hh, e, off_x, scan_y, userdata);
+               if (z != NULL) {
+                  // find insertion point
+                  if (active == NULL)
+                     active = z;
+                  else if (z->x < active->x) {
+                     // insert at front
+                     z->next = active;
+                     active = z;
+                  } else {
+                     // find thing to insert AFTER
+                     stbtt__active_edge *p = active;
+                     while (p->next && p->next->x < z->x)
+                        p = p->next;
+                     // at this point, p->next->x is NOT < z->x
+                     z->next = p->next;
+                     p->next = z;
+                  }
+               }
+            }
+            ++e;
+         }
+
+         // now process all active edges in XOR fashion
+         if (active)
+            stbtt__fill_active_edges(scanline, result->w, active, max_weight);
+
+         ++y;
+      }
+      STBTT_memcpy(result->pixels + j * result->stride, scanline, result->w);
+      ++j;
+   }
+
+   stbtt__hheap_cleanup(&hh, userdata);
+
+   if (scanline != scanline_data)
+      STBTT_free(scanline, userdata);
+}
+
+#elif STBTT_RASTERIZER_VERSION == 2
+
+// the edge passed in here does not cross the vertical line at x or the vertical line at x+1
+// (i.e. it has already been clipped to those)
+static void stbtt__handle_clipped_edge(double *scanline, int x, stbtt__active_edge *e, double x0, double y0, double x1, double y1)
+{
+   if (y0 == y1) return;
+   STBTT_assert(y0 < y1);
+   STBTT_assert(e->sy <= e->ey);
+   if (y0 > e->ey) return;
+   if (y1 < e->sy) return;
+   if (y0 < e->sy) {
+      x0 += (x1-x0) * (e->sy - y0) / (y1-y0);
+      y0 = e->sy;
+   }
+   if (y1 > e->ey) {
+      x1 += (x1-x0) * (e->ey - y1) / (y1-y0);
+      y1 = e->ey;
+   }
+
+   if (x0 == x)
+      STBTT_assert(x1 <= x+1);
+   else if (x0 == x+1)
+      STBTT_assert(x1 >= x);
+   else if (x0 <= x)
+      STBTT_assert(x1 <= x);
+   else if (x0 >= x+1)
+      STBTT_assert(x1 >= x+1);
+   else
+      STBTT_assert(x1 >= x && x1 <= x+1);
+
+   if (x0 <= x && x1 <= x)
+      scanline[x] += e->direction * (y1-y0);
+   else if (x0 >= x+1 && x1 >= x+1)
+      ;
+   else {
+      STBTT_assert(x0 >= x && x0 <= x+1 && x1 >= x && x1 <= x+1);
+      scanline[x] += e->direction * (y1-y0) * (1-((x0-x)+(x1-x))/2); // coverage = 1 - average x position
+   }
+}
+
+static void stbtt__fill_active_edges_new(double *scanline, double *scanline_fill, int len, stbtt__active_edge *e, double y_top)
+{
+   double y_bottom = y_top+1;
+
+   while (e) {
+      // brute force every pixel
+
+      // compute intersection points with top & bottom
+      STBTT_assert(e->ey >= y_top);
+
+      if (e->fdx == 0) {
+         double x0 = e->fx;
+         if (x0 < len) {
+            if (x0 >= 0) {
+               stbtt__handle_clipped_edge(scanline,(int) x0,e, x0,y_top, x0,y_bottom);
+               stbtt__handle_clipped_edge(scanline_fill-1,(int) x0+1,e, x0,y_top, x0,y_bottom);
+            } else {
+               stbtt__handle_clipped_edge(scanline_fill-1,0,e, x0,y_top, x0,y_bottom);
+            }
+         }
+      } else {
+         double x0 = e->fx;
+         double dx = e->fdx;
+         double xb = x0 + dx;
+         double x_top, x_bottom;
+         double sy0,sy1;
+         double dy = e->fdy;
+         STBTT_assert(e->sy <= y_bottom && e->ey >= y_top);
+
+         // compute endpoints of line segment clipped to this scanline (if the
+         // line segment starts on this scanline. x0 is the intersection of the
+         // line with y_top, but that may be off the line segment.
+         if (e->sy > y_top) {
+            x_top = x0 + dx * (e->sy - y_top);
+            sy0 = e->sy;
+         } else {
+            x_top = x0;
+            sy0 = y_top;
+         }
+         if (e->ey < y_bottom) {
+            x_bottom = x0 + dx * (e->ey - y_top);
+            sy1 = e->ey;
+         } else {
+            x_bottom = xb;
+            sy1 = y_bottom;
+         }
+
+         if (x_top >= 0 && x_bottom >= 0 && x_top < len && x_bottom < len) {
+            // from here on, we don't have to range check x values
+
+            if ((int) x_top == (int) x_bottom) {
+               double height;
+               // simple case, only spans one pixel
+               int x = (int) x_top;
+               height = sy1 - sy0;
+               STBTT_assert(x >= 0 && x < len);
+               scanline[x] += e->direction * (1-((x_top - x) + (x_bottom-x))/2)  * height;
+               scanline_fill[x] += e->direction * height; // everything right of this pixel is filled
+            } else {
+               int x,x1,x2;
+               double y_crossing, step, sign, area;
+               // covers 2+ pixels
+               if (x_top > x_bottom) {
+                  // flip scanline vertically; signed area is the same
+                  double t;
+                  sy0 = y_bottom - (sy0 - y_top);
+                  sy1 = y_bottom - (sy1 - y_top);
+                  t = sy0, sy0 = sy1, sy1 = t;
+                  t = x_bottom, x_bottom = x_top, x_top = t;
+                  dx = -dx;
+                  dy = -dy;
+                  t = x0, x0 = xb, xb = t;
+               }
+
+               x1 = (int) x_top;
+               x2 = (int) x_bottom;
+               // compute intersection with y axis at x1+1
+               y_crossing = (x1+1 - x0) * dy + y_top;
+
+               sign = e->direction;
+               // area of the rectangle covered from y0..y_crossing
+               area = sign * (y_crossing-sy0);
+               // area of the triangle (x_top,y0), (x+1,y0), (x+1,y_crossing)
+               scanline[x1] += area * (1-((x_top - x1)+(x1+1-x1))/2);
+
+               step = sign * dy;
+               for (x = x1+1; x < x2; ++x) {
+                  scanline[x] += area + step/2;
+                  area += step;
+               }
+               y_crossing += dy * (x2 - (x1+1));
+
+               STBTT_assert(STBTT_fabs(area) <= 1.01f);
+
+               scanline[x2] += area + sign * (1-((x2-x2)+(x_bottom-x2))/2) * (sy1-y_crossing);
+
+               scanline_fill[x2] += sign * (sy1-sy0);
+            }
+         } else {
+            // if edge goes outside of box we're drawing, we require
+            // clipping logic. since this does not match the intended use
+            // of this library, we use a different, very slow brute
+            // force implementation
+            int x;
+            for (x=0; x < len; ++x) {
+               // cases:
+               //
+               // there can be up to two intersections with the pixel. any intersection
+               // with left or right edges can be handled by splitting into two (or three)
+               // regions. intersections with top & bottom do not necessitate case-wise logic.
+               //
+               // the old way of doing this found the intersections with the left & right edges,
+               // then used some simple logic to produce up to three segments in sorted order
+               // from top-to-bottom. however, this had a problem: if an x edge was epsilon
+               // across the x border, then the corresponding y position might not be distinct
+               // from the other y segment, and it might ignored as an empty segment. to avoid
+               // that, we need to explicitly produce segments based on x positions.
+
+               // rename variables to clearly-defined pairs
+               double y0 = y_top;
+               double x1 = (double) (x);
+               double x2 = (double) (x+1);
+               double x3 = xb;
+               double y3 = y_bottom;
+
+               // x = e->x + e->dx * (y-y_top)
+               // (y-y_top) = (x - e->x) / e->dx
+               // y = (x - e->x) / e->dx + y_top
+               double y1 = (x - x0) / dx + y_top;
+               double y2 = (x+1 - x0) / dx + y_top;
+
+               if (x0 < x1 && x3 > x2) {         // three segments descending down-right
+                  stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x1,y1);
+                  stbtt__handle_clipped_edge(scanline,x,e, x1,y1, x2,y2);
+                  stbtt__handle_clipped_edge(scanline,x,e, x2,y2, x3,y3);
+               } else if (x3 < x1 && x0 > x2) {  // three segments descending down-left
+                  stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x2,y2);
+                  stbtt__handle_clipped_edge(scanline,x,e, x2,y2, x1,y1);
+                  stbtt__handle_clipped_edge(scanline,x,e, x1,y1, x3,y3);
+               } else if (x0 < x1 && x3 > x1) {  // two segments across x, down-right
+                  stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x1,y1);
+                  stbtt__handle_clipped_edge(scanline,x,e, x1,y1, x3,y3);
+               } else if (x3 < x1 && x0 > x1) {  // two segments across x, down-left
+                  stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x1,y1);
+                  stbtt__handle_clipped_edge(scanline,x,e, x1,y1, x3,y3);
+               } else if (x0 < x2 && x3 > x2) {  // two segments across x+1, down-right
+                  stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x2,y2);
+                  stbtt__handle_clipped_edge(scanline,x,e, x2,y2, x3,y3);
+               } else if (x3 < x2 && x0 > x2) {  // two segments across x+1, down-left
+                  stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x2,y2);
+                  stbtt__handle_clipped_edge(scanline,x,e, x2,y2, x3,y3);
+               } else {  // one segment
+                  stbtt__handle_clipped_edge(scanline,x,e, x0,y0, x3,y3);
+               }
+            }
+         }
+      }
+      e = e->next;
+   }
+}
+
+// directly AA rasterize edges w/o supersampling
+static void stbtt__rasterize_sorted_edges(stbtt__bitmap *result, stbtt__edge *e, int n, int vsubsample, int off_x, int off_y, void *userdata)
+{
+   stbtt__hheap hh = { 0, 0, 0 };
+   stbtt__active_edge *active = NULL;
+   int y,j=0, i;
+   double scanline_data[129], *scanline, *scanline2;
+
+   STBTT__NOTUSED(vsubsample);
+
+   if (result->w > 64)
+      scanline = (double *) STBTT_malloc((result->w*2+1) * sizeof(double), userdata);
+   else
+      scanline = scanline_data;
+
+   scanline2 = scanline + result->w;
+
+   y = off_y;
+   e[n].y0 = (double) (off_y + result->h) + 1;
+
+   while (j < result->h) {
+      // find center of pixel for this scanline
+      double scan_y_top    = y + 0.0f;
+      double scan_y_bottom = y + 1.0f;
+      stbtt__active_edge **step = &active;
+
+      STBTT_memset(scanline , 0, result->w*sizeof(scanline[0]));
+      STBTT_memset(scanline2, 0, (result->w+1)*sizeof(scanline[0]));
+
+      // update all active edges;
+      // remove all active edges that terminate before the top of this scanline
+      while (*step) {
+         stbtt__active_edge * z = *step;
+         if (z->ey <= scan_y_top) {
+            *step = z->next; // delete from list
+            STBTT_assert(z->direction);
+            z->direction = 0;
+            stbtt__hheap_free(&hh, z);
+         } else {
+            step = &((*step)->next); // advance through list
+         }
+      }
+
+      // insert all edges that start before the bottom of this scanline
+      while (e->y0 <= scan_y_bottom) {
+         if (e->y0 != e->y1) {
+            stbtt__active_edge *z = stbtt__new_active(&hh, e, off_x, scan_y_top, userdata);
+            if (z != NULL) {
+               if (j == 0 && off_y != 0) {
+                  if (z->ey < scan_y_top) {
+                     // this can happen due to subpixel positioning and some kind of fp rounding error i think
+                     z->ey = scan_y_top;
+                  }
+               }
+               STBTT_assert(z->ey >= scan_y_top); // if we get really unlucky a tiny bit of an edge can be out of bounds
+               // insert at front
+               z->next = active;
+               active = z;
+            }
+         }
+         ++e;
+      }
+
+      // now process all active edges
+      if (active)
+         stbtt__fill_active_edges_new(scanline, scanline2+1, result->w, active, scan_y_top);
+
+      {
+         double sum = 0;
+         for (i=0; i < result->w; ++i) {
+            double k;
+            int m;
+            sum += scanline2[i];
+            k = scanline[i] + sum;
+            k = (double) STBTT_fabs(k)*255 + 0.5f;
+            m = (int) k;
+            if (m > 255) m = 255;
+            result->pixels[j*result->stride + i] = (unsigned char) m;
+         }
+      }
+      // advance all the edges
+      step = &active;
+      while (*step) {
+         stbtt__active_edge *z = *step;
+         z->fx += z->fdx; // advance to position for current scanline
+         step = &((*step)->next); // advance through list
+      }
+
+      ++y;
+      ++j;
+   }
+
+   stbtt__hheap_cleanup(&hh, userdata);
+
+   if (scanline != scanline_data)
+      STBTT_free(scanline, userdata);
+}
+#else
+#error "Unrecognized value of STBTT_RASTERIZER_VERSION"
+#endif
+
+#define STBTT__COMPARE(a,b)  ((a)->y0 < (b)->y0)
+
+static void stbtt__sort_edges_ins_sort(stbtt__edge *p, int n)
+{
+   int i,j;
+   for (i=1; i < n; ++i) {
+      stbtt__edge t = p[i], *a = &t;
+      j = i;
+      while (j > 0) {
+         stbtt__edge *b = &p[j-1];
+         int c = STBTT__COMPARE(a,b);
+         if (!c) break;
+         p[j] = p[j-1];
+         --j;
+      }
+      if (i != j)
+         p[j] = t;
+   }
+}
+
+static void stbtt__sort_edges_quicksort(stbtt__edge *p, int n)
+{
+   /* threshold for transitioning to insertion sort */
+   while (n > 12) {
+      stbtt__edge t;
+      int c01,c12,c,m,i,j;
+
+      /* compute median of three */
+      m = n >> 1;
+      c01 = STBTT__COMPARE(&p[0],&p[m]);
+      c12 = STBTT__COMPARE(&p[m],&p[n-1]);
+      /* if 0 >= mid >= end, or 0 < mid < end, then use mid */
+      if (c01 != c12) {
+         /* otherwise, we'll need to swap something else to middle */
+         int z;
+         c = STBTT__COMPARE(&p[0],&p[n-1]);
+         /* 0>mid && mid<n:  0>n => n; 0<n => 0 */
+         /* 0<mid && mid>n:  0>n => 0; 0<n => n */
+         z = (c == c12) ? 0 : n-1;
+         t = p[z];
+         p[z] = p[m];
+         p[m] = t;
+      }
+      /* now p[m] is the median-of-three */
+      /* swap it to the beginning so it won't move around */
+      t = p[0];
+      p[0] = p[m];
+      p[m] = t;
+
+      /* partition loop */
+      i=1;
+      j=n-1;
+      for(;;) {
+         /* handling of equality is crucial here */
+         /* for sentinels & efficiency with duplicates */
+         for (;;++i) {
+            if (!STBTT__COMPARE(&p[i], &p[0])) break;
+         }
+         for (;;--j) {
+            if (!STBTT__COMPARE(&p[0], &p[j])) break;
+         }
+         /* make sure we haven't crossed */
+         if (i >= j) break;
+         t = p[i];
+         p[i] = p[j];
+         p[j] = t;
+
+         ++i;
+         --j;
+      }
+      /* recurse on smaller side, iterate on larger */
+      if (j < (n-i)) {
+         stbtt__sort_edges_quicksort(p,j);
+         p = p+i;
+         n = n-i;
+      } else {
+         stbtt__sort_edges_quicksort(p+i, n-i);
+         n = j;
+      }
+   }
+}
+
+static void stbtt__sort_edges(stbtt__edge *p, int n)
+{
+   stbtt__sort_edges_quicksort(p, n);
+   stbtt__sort_edges_ins_sort(p, n);
+}
+
+typedef struct
+{
+   double x,y;
+} stbtt__point;
+
+static void stbtt__rasterize(stbtt__bitmap *result, stbtt__point *pts, int *wcount, int windings, double scale_x, double scale_y, double shift_x, double shift_y, int off_x, int off_y, int invert, void *userdata)
+{
+   double y_scale_inv = invert ? -scale_y : scale_y;
+   stbtt__edge *e;
+   int n,i,j,k,m;
+#if STBTT_RASTERIZER_VERSION == 1
+   int vsubsample = result->h < 8 ? 15 : 5;
+#elif STBTT_RASTERIZER_VERSION == 2
+   int vsubsample = 1;
+#else
+   #error "Unrecognized value of STBTT_RASTERIZER_VERSION"
+#endif
+   // vsubsample should divide 255 evenly; otherwise we won't reach full opacity
+
+   // now we have to blow out the windings into explicit edge lists
+   n = 0;
+   for (i=0; i < windings; ++i)
+      n += wcount[i];
+
+   e = (stbtt__edge *) STBTT_malloc(sizeof(*e) * (n+1), userdata); // add an extra one as a sentinel
+   if (e == 0) return;
+   n = 0;
+
+   m=0;
+   for (i=0; i < windings; ++i) {
+      stbtt__point *p = pts + m;
+      m += wcount[i];
+      j = wcount[i]-1;
+      for (k=0; k < wcount[i]; j=k++) {
+         int a=k,b=j;
+         // skip the edge if horizontal
+         if (p[j].y == p[k].y)
+            continue;
+         // add edge from j to k to the list
+         e[n].invert = 0;
+         if (invert ? p[j].y > p[k].y : p[j].y < p[k].y) {
+            e[n].invert = 1;
+            a=j,b=k;
+         }
+         e[n].x0 = p[a].x * scale_x + shift_x;
+         e[n].y0 = (p[a].y * y_scale_inv + shift_y) * vsubsample;
+         e[n].x1 = p[b].x * scale_x + shift_x;
+         e[n].y1 = (p[b].y * y_scale_inv + shift_y) * vsubsample;
+         ++n;
+      }
+   }
+
+   // now sort the edges by their highest point (should snap to integer, and then by x)
+   //STBTT_sort(e, n, sizeof(e[0]), stbtt__edge_compare);
+   stbtt__sort_edges(e, n);
+
+   // now, traverse the scanlines and find the intersections on each scanline, use xor winding rule
+   stbtt__rasterize_sorted_edges(result, e, n, vsubsample, off_x, off_y, userdata);
+
+   STBTT_free(e, userdata);
+}
+
+static void stbtt__add_point(stbtt__point *points, int n, double x, double y)
+{
+   if (!points) return; // during first pass, it's unallocated
+   points[n].x = x;
+   points[n].y = y;
+}
+
+// tessellate until threshold p is happy... @TODO warped to compensate for non-linear stretching
+static int stbtt__tesselate_curve(stbtt__point *points, int *num_points, double x0, double y0, double x1, double y1, double x2, double y2, double objspace_flatness_squared, int n)
+{
+   // midpoint
+   double mx = (x0 + 2*x1 + x2)/4;
+   double my = (y0 + 2*y1 + y2)/4;
+   // versus directly drawn line
+   double dx = (x0+x2)/2 - mx;
+   double dy = (y0+y2)/2 - my;
+   if (n > 16) // 65536 segments on one curve better be enough!
+      return 1;
+   if (dx*dx+dy*dy > objspace_flatness_squared) { // half-pixel error allowed... need to be smaller if AA
+      stbtt__tesselate_curve(points, num_points, x0,y0, (x0+x1)/2.0f,(y0+y1)/2.0f, mx,my, objspace_flatness_squared,n+1);
+      stbtt__tesselate_curve(points, num_points, mx,my, (x1+x2)/2.0f,(y1+y2)/2.0f, x2,y2, objspace_flatness_squared,n+1);
+   } else {
+      stbtt__add_point(points, *num_points,x2,y2);
+      *num_points = *num_points+1;
+   }
+   return 1;
+}
+
+static void stbtt__tesselate_cubic(stbtt__point *points, int *num_points, double x0, double y0, double x1, double y1, double x2, double y2, double x3, double y3, double objspace_flatness_squared, int n)
+{
+   // @TODO this "flatness" calculation is just made-up nonsense that seems to work well enough
+   double dx0 = x1-x0;
+   double dy0 = y1-y0;
+   double dx1 = x2-x1;
+   double dy1 = y2-y1;
+   double dx2 = x3-x2;
+   double dy2 = y3-y2;
+   double dx = x3-x0;
+   double dy = y3-y0;
+   double longlen = (double) (STBTT_sqrt(dx0*dx0+dy0*dy0)+STBTT_sqrt(dx1*dx1+dy1*dy1)+STBTT_sqrt(dx2*dx2+dy2*dy2));
+   double shortlen = (double) STBTT_sqrt(dx*dx+dy*dy);
+   double flatness_squared = longlen*longlen-shortlen*shortlen;
+
+   if (n > 16) // 65536 segments on one curve better be enough!
+      return;
+
+   if (flatness_squared > objspace_flatness_squared) {
+      double x01 = (x0+x1)/2;
+      double y01 = (y0+y1)/2;
+      double x12 = (x1+x2)/2;
+      double y12 = (y1+y2)/2;
+      double x23 = (x2+x3)/2;
+      double y23 = (y2+y3)/2;
+
+      double xa = (x01+x12)/2;
+      double ya = (y01+y12)/2;
+      double xb = (x12+x23)/2;
+      double yb = (y12+y23)/2;
+
+      double mx = (xa+xb)/2;
+      double my = (ya+yb)/2;
+
+      stbtt__tesselate_cubic(points, num_points, x0,y0, x01,y01, xa,ya, mx,my, objspace_flatness_squared,n+1);
+      stbtt__tesselate_cubic(points, num_points, mx,my, xb,yb, x23,y23, x3,y3, objspace_flatness_squared,n+1);
+   } else {
+      stbtt__add_point(points, *num_points,x3,y3);
+      *num_points = *num_points+1;
+   }
+}
+
+// returns number of contours
+static stbtt__point *stbtt_FlattenCurves(stbtt_vertex *vertices, int num_verts, double objspace_flatness, int **contour_lengths, int *num_contours, void *userdata)
+{
+   stbtt__point *points=0;
+   int num_points=0;
+
+   double objspace_flatness_squared = objspace_flatness * objspace_flatness;
+   int i,n=0,start=0, pass;
+
+   // count how many "moves" there are to get the contour count
+   for (i=0; i < num_verts; ++i)
+      if (vertices[i].type == STBTT_vmove)
+         ++n;
+
+   *num_contours = n;
+   if (n == 0) return 0;
+
+   *contour_lengths = (int *) STBTT_malloc(sizeof(**contour_lengths) * n, userdata);
+
+   if (*contour_lengths == 0) {
+      *num_contours = 0;
+      return 0;
+   }
+
+   // make two passes through the points so we don't need to realloc
+   for (pass=0; pass < 2; ++pass) {
+      double x=0,y=0;
+      if (pass == 1) {
+         points = (stbtt__point *) STBTT_malloc(num_points * sizeof(points[0]), userdata);
+         if (points == NULL) goto error;
+      }
+      num_points = 0;
+      n= -1;
+      for (i=0; i < num_verts; ++i) {
+         switch (vertices[i].type) {
+            case STBTT_vmove:
+               // start the next contour
+               if (n >= 0)
+                  (*contour_lengths)[n] = num_points - start;
+               ++n;
+               start = num_points;
+
+               x = vertices[i].x, y = vertices[i].y;
+               stbtt__add_point(points, num_points++, x,y);
+               break;
+            case STBTT_vline:
+               x = vertices[i].x, y = vertices[i].y;
+               stbtt__add_point(points, num_points++, x, y);
+               break;
+            case STBTT_vcurve:
+               stbtt__tesselate_curve(points, &num_points, x,y,
+                                        vertices[i].cx, vertices[i].cy,
+                                        vertices[i].x,  vertices[i].y,
+                                        objspace_flatness_squared, 0);
+               x = vertices[i].x, y = vertices[i].y;
+               break;
+            case STBTT_vcubic:
+               stbtt__tesselate_cubic(points, &num_points, x,y,
+                                        vertices[i].cx, vertices[i].cy,
+                                        vertices[i].cx1, vertices[i].cy1,
+                                        vertices[i].x,  vertices[i].y,
+                                        objspace_flatness_squared, 0);
+               x = vertices[i].x, y = vertices[i].y;
+               break;
+         }
+      }
+      (*contour_lengths)[n] = num_points - start;
+   }
+
+   return points;
+error:
+   STBTT_free(points, userdata);
+   STBTT_free(*contour_lengths, userdata);
+   *contour_lengths = 0;
+   *num_contours = 0;
+   return NULL;
+}
+
+STBTT_DEF void stbtt_Rasterize(stbtt__bitmap *result, double flatness_in_pixels, stbtt_vertex *vertices, int num_verts, double scale_x, double scale_y, double shift_x, double shift_y, int x_off, int y_off, int invert, void *userdata)
+{
+   double scale            = scale_x > scale_y ? scale_y : scale_x;
+   int winding_count      = 0;
+   int *winding_lengths   = NULL;
+   stbtt__point *windings = stbtt_FlattenCurves(vertices, num_verts, flatness_in_pixels / scale, &winding_lengths, &winding_count, userdata);
+   if (windings) {
+      stbtt__rasterize(result, windings, winding_lengths, winding_count, scale_x, scale_y, shift_x, shift_y, x_off, y_off, invert, userdata);
+      STBTT_free(winding_lengths, userdata);
+      STBTT_free(windings, userdata);
+   }
+}
+
+STBTT_DEF void stbtt_FreeBitmap(unsigned char *bitmap, void *userdata)
+{
+   STBTT_free(bitmap, userdata);
+}
+
+STBTT_DEF unsigned char *stbtt_GetGlyphBitmapSubpixel(const stbtt_fontinfo *info, double scale_x, double scale_y, double shift_x, double shift_y, int glyph, int *width, int *height, int *xoff, int *yoff)
+{
+   int ix0,iy0,ix1,iy1;
+   stbtt__bitmap gbm;
+   stbtt_vertex *vertices;
+   int num_verts = stbtt_GetGlyphShape(info, glyph, &vertices);
+
+   if (scale_x == 0) scale_x = scale_y;
+   if (scale_y == 0) {
+      if (scale_x == 0) {
+         STBTT_free(vertices, info->userdata);
+         return NULL;
+      }
+      scale_y = scale_x;
+   }
+
+   stbtt_GetGlyphBitmapBoxSubpixel(info, glyph, scale_x, scale_y, shift_x, shift_y, &ix0,&iy0,&ix1,&iy1);
+
+   // now we get the size
+   gbm.w = (ix1 - ix0);
+   gbm.h = (iy1 - iy0);
+   gbm.pixels = NULL; // in case we error
+
+   if (width ) *width  = gbm.w;
+   if (height) *height = gbm.h;
+   if (xoff  ) *xoff   = ix0;
+   if (yoff  ) *yoff   = iy0;
+
+   if (gbm.w && gbm.h) {
+      gbm.pixels = (unsigned char *) STBTT_malloc(gbm.w * gbm.h, info->userdata);
+      if (gbm.pixels) {
+         gbm.stride = gbm.w;
+
+         stbtt_Rasterize(&gbm, 0.35f, vertices, num_verts, scale_x, scale_y, shift_x, shift_y, ix0, iy0, 1, info->userdata);
+      }
+   }
+   STBTT_free(vertices, info->userdata);
+   return gbm.pixels;
+}
+
+STBTT_DEF unsigned char *stbtt_GetGlyphBitmap(const stbtt_fontinfo *info, double scale_x, double scale_y, int glyph, int *width, int *height, int *xoff, int *yoff)
+{
+   return stbtt_GetGlyphBitmapSubpixel(info, scale_x, scale_y, 0.0f, 0.0f, glyph, width, height, xoff, yoff);
+}
+
+STBTT_DEF void stbtt_MakeGlyphBitmapSubpixel(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, double shift_x, double shift_y, int glyph)
+{
+   int ix0,iy0;
+   stbtt_vertex *vertices;
+   int num_verts = stbtt_GetGlyphShape(info, glyph, &vertices);
+   stbtt__bitmap gbm;
+
+   stbtt_GetGlyphBitmapBoxSubpixel(info, glyph, scale_x, scale_y, shift_x, shift_y, &ix0,&iy0,0,0);
+   gbm.pixels = output;
+   gbm.w = out_w;
+   gbm.h = out_h;
+   gbm.stride = out_stride;
+
+   if (gbm.w && gbm.h)
+      stbtt_Rasterize(&gbm, 0.35f, vertices, num_verts, scale_x, scale_y, shift_x, shift_y, ix0,iy0, 1, info->userdata);
+
+   STBTT_free(vertices, info->userdata);
+}
+
+STBTT_DEF void stbtt_MakeGlyphBitmap(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, int glyph)
+{
+   stbtt_MakeGlyphBitmapSubpixel(info, output, out_w, out_h, out_stride, scale_x, scale_y, 0.0f,0.0f, glyph);
+}
+
+STBTT_DEF unsigned char *stbtt_GetCodepointBitmapSubpixel(const stbtt_fontinfo *info, double scale_x, double scale_y, double shift_x, double shift_y, int codepoint, int *width, int *height, int *xoff, int *yoff)
+{
+   return stbtt_GetGlyphBitmapSubpixel(info, scale_x, scale_y,shift_x,shift_y, stbtt_FindGlyphIndex(info,codepoint), width,height,xoff,yoff);
+}
+
+STBTT_DEF void stbtt_MakeCodepointBitmapSubpixelPrefilter(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, double shift_x, double shift_y, int oversample_x, int oversample_y, double *sub_x, double *sub_y, int codepoint)
+{
+   stbtt_MakeGlyphBitmapSubpixelPrefilter(info, output, out_w, out_h, out_stride, scale_x, scale_y, shift_x, shift_y, oversample_x, oversample_y, sub_x, sub_y, stbtt_FindGlyphIndex(info,codepoint));
+}
+
+STBTT_DEF void stbtt_MakeCodepointBitmapSubpixel(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, double shift_x, double shift_y, int codepoint)
+{
+   stbtt_MakeGlyphBitmapSubpixel(info, output, out_w, out_h, out_stride, scale_x, scale_y, shift_x, shift_y, stbtt_FindGlyphIndex(info,codepoint));
+}
+
+STBTT_DEF unsigned char *stbtt_GetCodepointBitmap(const stbtt_fontinfo *info, double scale_x, double scale_y, int codepoint, int *width, int *height, int *xoff, int *yoff)
+{
+   return stbtt_GetCodepointBitmapSubpixel(info, scale_x, scale_y, 0.0f,0.0f, codepoint, width,height,xoff,yoff);
+}
+
+STBTT_DEF void stbtt_MakeCodepointBitmap(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, int codepoint)
+{
+   stbtt_MakeCodepointBitmapSubpixel(info, output, out_w, out_h, out_stride, scale_x, scale_y, 0.0f,0.0f, codepoint);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// bitmap baking
+//
+// This is SUPER-CRAPPY packing to keep source code small
+
+static int stbtt_BakeFontBitmap_internal(unsigned char *data, int offset,  // font location (use offset=0 for plain .ttf)
+                                double pixel_height,                     // height of font in pixels
+                                unsigned char *pixels, int pw, int ph,  // bitmap to be filled in
+                                int first_char, int num_chars,          // characters to bake
+                                stbtt_bakedchar *chardata)
+{
+   double scale;
+   int x,y,bottom_y, i;
+   stbtt_fontinfo f;
+   f.userdata = NULL;
+   if (!stbtt_InitFont(&f, data, offset))
+      return -1;
+   STBTT_memset(pixels, 0, pw*ph); // background of 0 around pixels
+   x=y=1;
+   bottom_y = 1;
+
+   scale = stbtt_ScaleForPixelHeight(&f, pixel_height);
+
+   for (i=0; i < num_chars; ++i) {
+      int advance, lsb, x0,y0,x1,y1,gw,gh;
+      int g = stbtt_FindGlyphIndex(&f, first_char + i);
+      stbtt_GetGlyphHMetrics(&f, g, &advance, &lsb);
+      stbtt_GetGlyphBitmapBox(&f, g, scale,scale, &x0,&y0,&x1,&y1);
+      gw = x1-x0;
+      gh = y1-y0;
+      if (x + gw + 1 >= pw)
+         y = bottom_y, x = 1; // advance to next row
+      if (y + gh + 1 >= ph) // check if it fits vertically AFTER potentially moving to next row
+         return -i;
+      STBTT_assert(x+gw < pw);
+      STBTT_assert(y+gh < ph);
+      stbtt_MakeGlyphBitmap(&f, pixels+x+y*pw, gw,gh,pw, scale,scale, g);
+      chardata[i].x0 = (stbtt_int16) x;
+      chardata[i].y0 = (stbtt_int16) y;
+      chardata[i].x1 = (stbtt_int16) (x + gw);
+      chardata[i].y1 = (stbtt_int16) (y + gh);
+      chardata[i].xadvance = scale * advance;
+      chardata[i].xoff     = (double) x0;
+      chardata[i].yoff     = (double) y0;
+      x = x + gw + 1;
+      if (y+gh+1 > bottom_y)
+         bottom_y = y+gh+1;
+   }
+   return bottom_y;
+}
+
+STBTT_DEF void stbtt_GetBakedQuad(const stbtt_bakedchar *chardata, int pw, int ph, int char_index, double *xpos, double *ypos, stbtt_aligned_quad *q, int opengl_fillrule)
+{
+   double d3d_bias = opengl_fillrule ? 0 : -0.5f;
+   double ipw = 1.0f / pw, iph = 1.0f / ph;
+   const stbtt_bakedchar *b = chardata + char_index;
+   int round_x = STBTT_ifloor((*xpos + b->xoff) + 0.5f);
+   int round_y = STBTT_ifloor((*ypos + b->yoff) + 0.5f);
+
+   q->x0 = round_x + d3d_bias;
+   q->y0 = round_y + d3d_bias;
+   q->x1 = round_x + b->x1 - b->x0 + d3d_bias;
+   q->y1 = round_y + b->y1 - b->y0 + d3d_bias;
+
+   q->s0 = b->x0 * ipw;
+   q->t0 = b->y0 * iph;
+   q->s1 = b->x1 * ipw;
+   q->t1 = b->y1 * iph;
+
+   *xpos += b->xadvance;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// rectangle packing replacement routines if you don't have stb_rect_pack.h
+//
+
+#ifndef STB_RECT_PACK_VERSION
+
+typedef int stbrp_coord;
+
+////////////////////////////////////////////////////////////////////////////////////
+//                                                                                //
+//                                                                                //
+// COMPILER WARNING ?!?!?                                                         //
+//                                                                                //
+//                                                                                //
+// if you get a compile warning due to these symbols being defined more than      //
+// once, move #include "stb_rect_pack.h" before #include "stb_truetype.h"         //
+//                                                                                //
+////////////////////////////////////////////////////////////////////////////////////
+
+typedef struct
+{
+   int width,height;
+   int x,y,bottom_y;
+} stbrp_context;
+
+typedef struct
+{
+   unsigned char x;
+} stbrp_node;
+
+struct stbrp_rect
+{
+   stbrp_coord x,y;
+   int id,w,h,was_packed;
+};
+
+static void stbrp_init_target(stbrp_context *con, int pw, int ph, stbrp_node *nodes, int num_nodes)
+{
+   con->width  = pw;
+   con->height = ph;
+   con->x = 0;
+   con->y = 0;
+   con->bottom_y = 0;
+   STBTT__NOTUSED(nodes);
+   STBTT__NOTUSED(num_nodes);
+}
+
+static void stbrp_pack_rects(stbrp_context *con, stbrp_rect *rects, int num_rects)
+{
+   int i;
+   for (i=0; i < num_rects; ++i) {
+      if (con->x + rects[i].w > con->width) {
+         con->x = 0;
+         con->y = con->bottom_y;
+      }
+      if (con->y + rects[i].h > con->height)
+         break;
+      rects[i].x = con->x;
+      rects[i].y = con->y;
+      rects[i].was_packed = 1;
+      con->x += rects[i].w;
+      if (con->y + rects[i].h > con->bottom_y)
+         con->bottom_y = con->y + rects[i].h;
+   }
+   for (   ; i < num_rects; ++i)
+      rects[i].was_packed = 0;
+}
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// bitmap baking
+//
+// This is SUPER-AWESOME (tm Ryan Gordon) packing using stb_rect_pack.h. If
+// stb_rect_pack.h isn't available, it uses the BakeFontBitmap strategy.
+
+STBTT_DEF int stbtt_PackBegin(stbtt_pack_context *spc, unsigned char *pixels, int pw, int ph, int stride_in_bytes, int padding, void *alloc_context)
+{
+   stbrp_context *context = (stbrp_context *) STBTT_malloc(sizeof(*context)            ,alloc_context);
+   int            num_nodes = pw - padding;
+   stbrp_node    *nodes   = (stbrp_node    *) STBTT_malloc(sizeof(*nodes  ) * num_nodes,alloc_context);
+
+   if (context == NULL || nodes == NULL) {
+      if (context != NULL) STBTT_free(context, alloc_context);
+      if (nodes   != NULL) STBTT_free(nodes  , alloc_context);
+      return 0;
+   }
+
+   spc->user_allocator_context = alloc_context;
+   spc->width = pw;
+   spc->height = ph;
+   spc->pixels = pixels;
+   spc->pack_info = context;
+   spc->nodes = nodes;
+   spc->padding = padding;
+   spc->stride_in_bytes = stride_in_bytes != 0 ? stride_in_bytes : pw;
+   spc->h_oversample = 1;
+   spc->v_oversample = 1;
+   spc->skip_missing = 0;
+
+   stbrp_init_target(context, pw-padding, ph-padding, nodes, num_nodes);
+
+   if (pixels)
+      STBTT_memset(pixels, 0, pw*ph); // background of 0 around pixels
+
+   return 1;
+}
+
+STBTT_DEF void stbtt_PackEnd  (stbtt_pack_context *spc)
+{
+   STBTT_free(spc->nodes    , spc->user_allocator_context);
+   STBTT_free(spc->pack_info, spc->user_allocator_context);
+}
+
+STBTT_DEF void stbtt_PackSetOversampling(stbtt_pack_context *spc, unsigned int h_oversample, unsigned int v_oversample)
+{
+   STBTT_assert(h_oversample <= STBTT_MAX_OVERSAMPLE);
+   STBTT_assert(v_oversample <= STBTT_MAX_OVERSAMPLE);
+   if (h_oversample <= STBTT_MAX_OVERSAMPLE)
+      spc->h_oversample = h_oversample;
+   if (v_oversample <= STBTT_MAX_OVERSAMPLE)
+      spc->v_oversample = v_oversample;
+}
+
+STBTT_DEF void stbtt_PackSetSkipMissingCodepoints(stbtt_pack_context *spc, int skip)
+{
+   spc->skip_missing = skip;
+}
+
+#define STBTT__OVER_MASK  (STBTT_MAX_OVERSAMPLE-1)
+
+static void stbtt__h_prefilter(unsigned char *pixels, int w, int h, int stride_in_bytes, unsigned int kernel_width)
+{
+   unsigned char buffer[STBTT_MAX_OVERSAMPLE];
+   int safe_w = w - kernel_width;
+   int j;
+   STBTT_memset(buffer, 0, STBTT_MAX_OVERSAMPLE); // suppress bogus warning from VS2013 -analyze
+   for (j=0; j < h; ++j) {
+      int i;
+      unsigned int total;
+      STBTT_memset(buffer, 0, kernel_width);
+
+      total = 0;
+
+      // make kernel_width a constant in common cases so compiler can optimize out the divide
+      switch (kernel_width) {
+         case 2:
+            for (i=0; i <= safe_w; ++i) {
+               total += pixels[i] - buffer[i & STBTT__OVER_MASK];
+               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i];
+               pixels[i] = (unsigned char) (total / 2);
+            }
+            break;
+         case 3:
+            for (i=0; i <= safe_w; ++i) {
+               total += pixels[i] - buffer[i & STBTT__OVER_MASK];
+               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i];
+               pixels[i] = (unsigned char) (total / 3);
+            }
+            break;
+         case 4:
+            for (i=0; i <= safe_w; ++i) {
+               total += pixels[i] - buffer[i & STBTT__OVER_MASK];
+               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i];
+               pixels[i] = (unsigned char) (total / 4);
+            }
+            break;
+         case 5:
+            for (i=0; i <= safe_w; ++i) {
+               total += pixels[i] - buffer[i & STBTT__OVER_MASK];
+               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i];
+               pixels[i] = (unsigned char) (total / 5);
+            }
+            break;
+         default:
+            for (i=0; i <= safe_w; ++i) {
+               total += pixels[i] - buffer[i & STBTT__OVER_MASK];
+               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i];
+               pixels[i] = (unsigned char) (total / kernel_width);
+            }
+            break;
+      }
+
+      for (; i < w; ++i) {
+         STBTT_assert(pixels[i] == 0);
+         total -= buffer[i & STBTT__OVER_MASK];
+         pixels[i] = (unsigned char) (total / kernel_width);
+      }
+
+      pixels += stride_in_bytes;
+   }
+}
+
+static void stbtt__v_prefilter(unsigned char *pixels, int w, int h, int stride_in_bytes, unsigned int kernel_width)
+{
+   unsigned char buffer[STBTT_MAX_OVERSAMPLE];
+   int safe_h = h - kernel_width;
+   int j;
+   STBTT_memset(buffer, 0, STBTT_MAX_OVERSAMPLE); // suppress bogus warning from VS2013 -analyze
+   for (j=0; j < w; ++j) {
+      int i;
+      unsigned int total;
+      STBTT_memset(buffer, 0, kernel_width);
+
+      total = 0;
+
+      // make kernel_width a constant in common cases so compiler can optimize out the divide
+      switch (kernel_width) {
+         case 2:
+            for (i=0; i <= safe_h; ++i) {
+               total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK];
+               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes];
+               pixels[i*stride_in_bytes] = (unsigned char) (total / 2);
+            }
+            break;
+         case 3:
+            for (i=0; i <= safe_h; ++i) {
+               total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK];
+               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes];
+               pixels[i*stride_in_bytes] = (unsigned char) (total / 3);
+            }
+            break;
+         case 4:
+            for (i=0; i <= safe_h; ++i) {
+               total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK];
+               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes];
+               pixels[i*stride_in_bytes] = (unsigned char) (total / 4);
+            }
+            break;
+         case 5:
+            for (i=0; i <= safe_h; ++i) {
+               total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK];
+               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes];
+               pixels[i*stride_in_bytes] = (unsigned char) (total / 5);
+            }
+            break;
+         default:
+            for (i=0; i <= safe_h; ++i) {
+               total += pixels[i*stride_in_bytes] - buffer[i & STBTT__OVER_MASK];
+               buffer[(i+kernel_width) & STBTT__OVER_MASK] = pixels[i*stride_in_bytes];
+               pixels[i*stride_in_bytes] = (unsigned char) (total / kernel_width);
+            }
+            break;
+      }
+
+      for (; i < h; ++i) {
+         STBTT_assert(pixels[i*stride_in_bytes] == 0);
+         total -= buffer[i & STBTT__OVER_MASK];
+         pixels[i*stride_in_bytes] = (unsigned char) (total / kernel_width);
+      }
+
+      pixels += 1;
+   }
+}
+
+static double stbtt__oversample_shift(int oversample)
+{
+   if (!oversample)
+      return 0.0f;
+
+   // The prefilter is a box filter of width "oversample",
+   // which shifts phase by (oversample - 1)/2 pixels in
+   // oversampled space. We want to shift in the opposite
+   // direction to counter this.
+   return (double)-(oversample - 1) / (2.0f * (double)oversample);
+}
+
+// rects array must be big enough to accommodate all characters in the given ranges
+STBTT_DEF int stbtt_PackFontRangesGatherRects(stbtt_pack_context *spc, const stbtt_fontinfo *info, stbtt_pack_range *ranges, int num_ranges, stbrp_rect *rects)
+{
+   int i,j,k;
+   int missing_glyph_added = 0;
+
+   k=0;
+   for (i=0; i < num_ranges; ++i) {
+      double fh = ranges[i].font_size;
+      double scale = fh > 0 ? stbtt_ScaleForPixelHeight(info, fh) : stbtt_ScaleForMappingEmToPixels(info, -fh);
+      ranges[i].h_oversample = (unsigned char) spc->h_oversample;
+      ranges[i].v_oversample = (unsigned char) spc->v_oversample;
+      for (j=0; j < ranges[i].num_chars; ++j) {
+         int x0,y0,x1,y1;
+         int codepoint = ranges[i].array_of_unicode_codepoints == NULL ? ranges[i].first_unicode_codepoint_in_range + j : ranges[i].array_of_unicode_codepoints[j];
+         int glyph = stbtt_FindGlyphIndex(info, codepoint);
+         if (glyph == 0 && (spc->skip_missing || missing_glyph_added)) {
+            rects[k].w = rects[k].h = 0;
+         } else {
+            stbtt_GetGlyphBitmapBoxSubpixel(info,glyph,
+                                            scale * spc->h_oversample,
+                                            scale * spc->v_oversample,
+                                            0,0,
+                                            &x0,&y0,&x1,&y1);
+            rects[k].w = (stbrp_coord) (x1-x0 + spc->padding + spc->h_oversample-1);
+            rects[k].h = (stbrp_coord) (y1-y0 + spc->padding + spc->v_oversample-1);
+            if (glyph == 0)
+               missing_glyph_added = 1;
+         }
+         ++k;
+      }
+   }
+
+   return k;
+}
+
+STBTT_DEF void stbtt_MakeGlyphBitmapSubpixelPrefilter(const stbtt_fontinfo *info, unsigned char *output, int out_w, int out_h, int out_stride, double scale_x, double scale_y, double shift_x, double shift_y, int prefilter_x, int prefilter_y, double *sub_x, double *sub_y, int glyph)
+{
+   stbtt_MakeGlyphBitmapSubpixel(info,
+                                 output,
+                                 out_w - (prefilter_x - 1),
+                                 out_h - (prefilter_y - 1),
+                                 out_stride,
+                                 scale_x,
+                                 scale_y,
+                                 shift_x,
+                                 shift_y,
+                                 glyph);
+
+   if (prefilter_x > 1)
+      stbtt__h_prefilter(output, out_w, out_h, out_stride, prefilter_x);
+
+   if (prefilter_y > 1)
+      stbtt__v_prefilter(output, out_w, out_h, out_stride, prefilter_y);
+
+   *sub_x = stbtt__oversample_shift(prefilter_x);
+   *sub_y = stbtt__oversample_shift(prefilter_y);
+}
+
+// rects array must be big enough to accommodate all characters in the given ranges
+STBTT_DEF int stbtt_PackFontRangesRenderIntoRects(stbtt_pack_context *spc, const stbtt_fontinfo *info, stbtt_pack_range *ranges, int num_ranges, stbrp_rect *rects)
+{
+   int i,j,k, missing_glyph = -1, return_value = 1;
+
+   // save current values
+   int old_h_over = spc->h_oversample;
+   int old_v_over = spc->v_oversample;
+
+   k = 0;
+   for (i=0; i < num_ranges; ++i) {
+      double fh = ranges[i].font_size;
+      double scale = fh > 0 ? stbtt_ScaleForPixelHeight(info, fh) : stbtt_ScaleForMappingEmToPixels(info, -fh);
+      double recip_h,recip_v,sub_x,sub_y;
+      spc->h_oversample = ranges[i].h_oversample;
+      spc->v_oversample = ranges[i].v_oversample;
+      recip_h = 1.0f / spc->h_oversample;
+      recip_v = 1.0f / spc->v_oversample;
+      sub_x = stbtt__oversample_shift(spc->h_oversample);
+      sub_y = stbtt__oversample_shift(spc->v_oversample);
+      for (j=0; j < ranges[i].num_chars; ++j) {
+         stbrp_rect *r = &rects[k];
+         if (r->was_packed && r->w != 0 && r->h != 0) {
+            stbtt_packedchar *bc = &ranges[i].chardata_for_range[j];
+            int advance, lsb, x0,y0,x1,y1;
+            int codepoint = ranges[i].array_of_unicode_codepoints == NULL ? ranges[i].first_unicode_codepoint_in_range + j : ranges[i].array_of_unicode_codepoints[j];
+            int glyph = stbtt_FindGlyphIndex(info, codepoint);
+            stbrp_coord pad = (stbrp_coord) spc->padding;
+
+            // pad on left and top
+            r->x += pad;
+            r->y += pad;
+            r->w -= pad;
+            r->h -= pad;
+            stbtt_GetGlyphHMetrics(info, glyph, &advance, &lsb);
+            stbtt_GetGlyphBitmapBox(info, glyph,
+                                    scale * spc->h_oversample,
+                                    scale * spc->v_oversample,
+                                    &x0,&y0,&x1,&y1);
+            stbtt_MakeGlyphBitmapSubpixel(info,
+                                          spc->pixels + r->x + r->y*spc->stride_in_bytes,
+                                          r->w - spc->h_oversample+1,
+                                          r->h - spc->v_oversample+1,
+                                          spc->stride_in_bytes,
+                                          scale * spc->h_oversample,
+                                          scale * spc->v_oversample,
+                                          0,0,
+                                          glyph);
+
+            if (spc->h_oversample > 1)
+               stbtt__h_prefilter(spc->pixels + r->x + r->y*spc->stride_in_bytes,
+                                  r->w, r->h, spc->stride_in_bytes,
+                                  spc->h_oversample);
+
+            if (spc->v_oversample > 1)
+               stbtt__v_prefilter(spc->pixels + r->x + r->y*spc->stride_in_bytes,
+                                  r->w, r->h, spc->stride_in_bytes,
+                                  spc->v_oversample);
+
+            bc->x0       = (stbtt_int16)  r->x;
+            bc->y0       = (stbtt_int16)  r->y;
+            bc->x1       = (stbtt_int16) (r->x + r->w);
+            bc->y1       = (stbtt_int16) (r->y + r->h);
+            bc->xadvance =                scale * advance;
+            bc->xoff     =       (double)  x0 * recip_h + sub_x;
+            bc->yoff     =       (double)  y0 * recip_v + sub_y;
+            bc->xoff2    =                (x0 + r->w) * recip_h + sub_x;
+            bc->yoff2    =                (y0 + r->h) * recip_v + sub_y;
+
+            if (glyph == 0)
+               missing_glyph = j;
+         } else if (spc->skip_missing) {
+            return_value = 0;
+         } else if (r->was_packed && r->w == 0 && r->h == 0 && missing_glyph >= 0) {
+            ranges[i].chardata_for_range[j] = ranges[i].chardata_for_range[missing_glyph];
+         } else {
+            return_value = 0; // if any fail, report failure
+         }
+
+         ++k;
+      }
+   }
+
+   // restore original values
+   spc->h_oversample = old_h_over;
+   spc->v_oversample = old_v_over;
+
+   return return_value;
+}
+
+STBTT_DEF void stbtt_PackFontRangesPackRects(stbtt_pack_context *spc, stbrp_rect *rects, int num_rects)
+{
+   stbrp_pack_rects((stbrp_context *) spc->pack_info, rects, num_rects);
+}
+
+STBTT_DEF int stbtt_PackFontRanges(stbtt_pack_context *spc, const unsigned char *fontdata, int font_index, stbtt_pack_range *ranges, int num_ranges)
+{
+   stbtt_fontinfo info;
+   int i,j,n, return_value = 1;
+   //stbrp_context *context = (stbrp_context *) spc->pack_info;
+   stbrp_rect    *rects;
+
+   // flag all characters as NOT packed
+   for (i=0; i < num_ranges; ++i)
+      for (j=0; j < ranges[i].num_chars; ++j)
+         ranges[i].chardata_for_range[j].x0 =
+         ranges[i].chardata_for_range[j].y0 =
+         ranges[i].chardata_for_range[j].x1 =
+         ranges[i].chardata_for_range[j].y1 = 0;
+
+   n = 0;
+   for (i=0; i < num_ranges; ++i)
+      n += ranges[i].num_chars;
+
+   rects = (stbrp_rect *) STBTT_malloc(sizeof(*rects) * n, spc->user_allocator_context);
+   if (rects == NULL)
+      return 0;
+
+   info.userdata = spc->user_allocator_context;
+   stbtt_InitFont(&info, fontdata, stbtt_GetFontOffsetForIndex(fontdata,font_index));
+
+   n = stbtt_PackFontRangesGatherRects(spc, &info, ranges, num_ranges, rects);
+
+   stbtt_PackFontRangesPackRects(spc, rects, n);
+
+   return_value = stbtt_PackFontRangesRenderIntoRects(spc, &info, ranges, num_ranges, rects);
+
+   STBTT_free(rects, spc->user_allocator_context);
+   return return_value;
+}
+
+STBTT_DEF int stbtt_PackFontRange(stbtt_pack_context *spc, const unsigned char *fontdata, int font_index, double font_size,
+            int first_unicode_codepoint_in_range, int num_chars_in_range, stbtt_packedchar *chardata_for_range)
+{
+   stbtt_pack_range range;
+   range.first_unicode_codepoint_in_range = first_unicode_codepoint_in_range;
+   range.array_of_unicode_codepoints = NULL;
+   range.num_chars                   = num_chars_in_range;
+   range.chardata_for_range          = chardata_for_range;
+   range.font_size                   = font_size;
+   return stbtt_PackFontRanges(spc, fontdata, font_index, &range, 1);
+}
+
+STBTT_DEF void stbtt_GetScaledFontVMetrics(const unsigned char *fontdata, int index, double size, double *ascent, double *descent, double *lineGap)
+{
+   int i_ascent, i_descent, i_lineGap;
+   double scale;
+   stbtt_fontinfo info;
+   stbtt_InitFont(&info, fontdata, stbtt_GetFontOffsetForIndex(fontdata, index));
+   scale = size > 0 ? stbtt_ScaleForPixelHeight(&info, size) : stbtt_ScaleForMappingEmToPixels(&info, -size);
+   stbtt_GetFontVMetrics(&info, &i_ascent, &i_descent, &i_lineGap);
+   *ascent  = (double) i_ascent  * scale;
+   *descent = (double) i_descent * scale;
+   *lineGap = (double) i_lineGap * scale;
+}
+
+STBTT_DEF void stbtt_GetPackedQuad(const stbtt_packedchar *chardata, int pw, int ph, int char_index, double *xpos, double *ypos, stbtt_aligned_quad *q, int align_to_integer)
+{
+   double ipw = 1.0f / pw, iph = 1.0f / ph;
+   const stbtt_packedchar *b = chardata + char_index;
+
+   if (align_to_integer) {
+      double x = (double) STBTT_ifloor((*xpos + b->xoff) + 0.5f);
+      double y = (double) STBTT_ifloor((*ypos + b->yoff) + 0.5f);
+      q->x0 = x;
+      q->y0 = y;
+      q->x1 = x + b->xoff2 - b->xoff;
+      q->y1 = y + b->yoff2 - b->yoff;
+   } else {
+      q->x0 = *xpos + b->xoff;
+      q->y0 = *ypos + b->yoff;
+      q->x1 = *xpos + b->xoff2;
+      q->y1 = *ypos + b->yoff2;
+   }
+
+   q->s0 = b->x0 * ipw;
+   q->t0 = b->y0 * iph;
+   q->s1 = b->x1 * ipw;
+   q->t1 = b->y1 * iph;
+
+   *xpos += b->xadvance;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// sdf computation
+//
+
+#define STBTT_min(a,b)  ((a) < (b) ? (a) : (b))
+#define STBTT_max(a,b)  ((a) < (b) ? (b) : (a))
+
+static int stbtt__ray_intersect_bezier(double orig[2], double ray[2], double q0[2], double q1[2], double q2[2], double hits[2][2])
+{
+   double q0perp = q0[1]*ray[0] - q0[0]*ray[1];
+   double q1perp = q1[1]*ray[0] - q1[0]*ray[1];
+   double q2perp = q2[1]*ray[0] - q2[0]*ray[1];
+   double roperp = orig[1]*ray[0] - orig[0]*ray[1];
+
+   double a = q0perp - 2*q1perp + q2perp;
+   double b = q1perp - q0perp;
+   double c = q0perp - roperp;
+
+   double s0 = 0., s1 = 0.;
+   int num_s = 0;
+
+   if (a != 0.0) {
+      double discr = b*b - a*c;
+      if (discr > 0.0) {
+         double rcpna = -1 / a;
+         double d = (double) STBTT_sqrt(discr);
+         s0 = (b+d) * rcpna;
+         s1 = (b-d) * rcpna;
+         if (s0 >= 0.0 && s0 <= 1.0)
+            num_s = 1;
+         if (d > 0.0 && s1 >= 0.0 && s1 <= 1.0) {
+            if (num_s == 0) s0 = s1;
+            ++num_s;
+         }
+      }
+   } else {
+      // 2*b*s + c = 0
+      // s = -c / (2*b)
+      s0 = c / (-2 * b);
+      if (s0 >= 0.0 && s0 <= 1.0)
+         num_s = 1;
+   }
+
+   if (num_s == 0)
+      return 0;
+   else {
+      double rcp_len2 = 1 / (ray[0]*ray[0] + ray[1]*ray[1]);
+      double rayn_x = ray[0] * rcp_len2, rayn_y = ray[1] * rcp_len2;
+
+      double q0d =   q0[0]*rayn_x +   q0[1]*rayn_y;
+      double q1d =   q1[0]*rayn_x +   q1[1]*rayn_y;
+      double q2d =   q2[0]*rayn_x +   q2[1]*rayn_y;
+      double rod = orig[0]*rayn_x + orig[1]*rayn_y;
+
+      double q10d = q1d - q0d;
+      double q20d = q2d - q0d;
+      double q0rd = q0d - rod;
+
+      hits[0][0] = q0rd + s0*(2.0f - 2.0f*s0)*q10d + s0*s0*q20d;
+      hits[0][1] = a*s0+b;
+
+      if (num_s > 1) {
+         hits[1][0] = q0rd + s1*(2.0f - 2.0f*s1)*q10d + s1*s1*q20d;
+         hits[1][1] = a*s1+b;
+         return 2;
+      } else {
+         return 1;
+      }
+   }
+}
+
+static int equal(double *a, double *b)
+{
+   return (a[0] == b[0] && a[1] == b[1]);
+}
+
+static int stbtt__compute_crossings_x(double x, double y, int nverts, stbtt_vertex *verts)
+{
+   int i;
+   double orig[2], ray[2] = { 1, 0 };
+   double y_frac;
+   int winding = 0;
+
+   orig[0] = x;
+   orig[1] = y;
+
+   // make sure y never passes through a vertex of the shape
+   y_frac = (double) STBTT_fmod(y, 1.0f);
+   if (y_frac < 0.01f)
+      y += 0.01f;
+   else if (y_frac > 0.99f)
+      y -= 0.01f;
+   orig[1] = y;
+
+   // test a ray from (-infinity,y) to (x,y)
+   for (i=0; i < nverts; ++i) {
+      if (verts[i].type == STBTT_vline) {
+         int x0 = (int) verts[i-1].x, y0 = (int) verts[i-1].y;
+         int x1 = (int) verts[i  ].x, y1 = (int) verts[i  ].y;
+         if (y > STBTT_min(y0,y1) && y < STBTT_max(y0,y1) && x > STBTT_min(x0,x1)) {
+            double x_inter = (y - y0) / (y1 - y0) * (x1-x0) + x0;
+            if (x_inter < x)
+               winding += (y0 < y1) ? 1 : -1;
+         }
+      }
+      if (verts[i].type == STBTT_vcurve) {
+         int x0 = (int) verts[i-1].x , y0 = (int) verts[i-1].y ;
+         int x1 = (int) verts[i  ].cx, y1 = (int) verts[i  ].cy;
+         int x2 = (int) verts[i  ].x , y2 = (int) verts[i  ].y ;
+         int ax = STBTT_min(x0,STBTT_min(x1,x2)), ay = STBTT_min(y0,STBTT_min(y1,y2));
+         int by = STBTT_max(y0,STBTT_max(y1,y2));
+         if (y > ay && y < by && x > ax) {
+            double q0[2],q1[2],q2[2];
+            double hits[2][2];
+            q0[0] = (double)x0;
+            q0[1] = (double)y0;
+            q1[0] = (double)x1;
+            q1[1] = (double)y1;
+            q2[0] = (double)x2;
+            q2[1] = (double)y2;
+            if (equal(q0,q1) || equal(q1,q2)) {
+               x0 = (int)verts[i-1].x;
+               y0 = (int)verts[i-1].y;
+               x1 = (int)verts[i  ].x;
+               y1 = (int)verts[i  ].y;
+               if (y > STBTT_min(y0,y1) && y < STBTT_max(y0,y1) && x > STBTT_min(x0,x1)) {
+                  double x_inter = (y - y0) / (y1 - y0) * (x1-x0) + x0;
+                  if (x_inter < x)
+                     winding += (y0 < y1) ? 1 : -1;
+               }
+            } else {
+               int num_hits = stbtt__ray_intersect_bezier(orig, ray, q0, q1, q2, hits);
+               if (num_hits >= 1)
+                  if (hits[0][0] < 0)
+                     winding += (hits[0][1] < 0 ? -1 : 1);
+               if (num_hits >= 2)
+                  if (hits[1][0] < 0)
+                     winding += (hits[1][1] < 0 ? -1 : 1);
+            }
+         }
+      }
+   }
+   return winding;
+}
+
+static double stbtt__cuberoot( double x )
+{
+   if (x<0)
+      return -(double) STBTT_pow(-x,1.0f/3.0f);
+   else
+      return  (double) STBTT_pow( x,1.0f/3.0f);
+}
+
+// x^3 + c*x^2 + b*x + a = 0
+static int stbtt__solve_cubic(double a, double b, double c, double* r)
+{
+	double s = -a / 3;
+	double p = b - a*a / 3;
+	double q = a * (2*a*a - 9*b) / 27 + c;
+   double p3 = p*p*p;
+	double d = q*q + 4*p3 / 27;
+	if (d >= 0) {
+		double z = (double) STBTT_sqrt(d);
+		double u = (-q + z) / 2;
+		double v = (-q - z) / 2;
+		u = stbtt__cuberoot(u);
+		v = stbtt__cuberoot(v);
+		r[0] = s + u + v;
+		return 1;
+	} else {
+	   double u = (double) STBTT_sqrt(-p/3);
+	   double v = (double) STBTT_acos(-STBTT_sqrt(-27/p3) * q / 2) / 3; // p3 must be negative, since d is negative
+	   double m = (double) STBTT_cos(v);
+      double n = (double) STBTT_cos(v-3.141592/2)*1.732050808f;
+	   r[0] = s + u * 2 * m;
+	   r[1] = s - u * (m + n);
+	   r[2] = s - u * (m - n);
+
+      //STBTT_assert( STBTT_fabs(((r[0]+a)*r[0]+b)*r[0]+c) < 0.05f);  // these asserts may not be safe at all scales, though they're in bezier t parameter units so maybe?
+      //STBTT_assert( STBTT_fabs(((r[1]+a)*r[1]+b)*r[1]+c) < 0.05f);
+      //STBTT_assert( STBTT_fabs(((r[2]+a)*r[2]+b)*r[2]+c) < 0.05f);
+   	return 3;
+   }
+}
+
+STBTT_DEF unsigned char * stbtt_GetGlyphSDF(const stbtt_fontinfo *info, double scale, int glyph, int padding, unsigned char onedge_value, double pixel_dist_scale, int *width, int *height, int *xoff, int *yoff)
+{
+   double scale_x = scale, scale_y = scale;
+   int ix0,iy0,ix1,iy1;
+   int w,h;
+   unsigned char *data;
+
+   if (scale == 0) return NULL;
+
+   stbtt_GetGlyphBitmapBoxSubpixel(info, glyph, scale, scale, 0.0f,0.0f, &ix0,&iy0,&ix1,&iy1);
+
+   // if empty, return NULL
+   if (ix0 == ix1 || iy0 == iy1)
+      return NULL;
+
+   ix0 -= padding;
+   iy0 -= padding;
+   ix1 += padding;
+   iy1 += padding;
+
+   w = (ix1 - ix0);
+   h = (iy1 - iy0);
+
+   if (width ) *width  = w;
+   if (height) *height = h;
+   if (xoff  ) *xoff   = ix0;
+   if (yoff  ) *yoff   = iy0;
+
+   // invert for y-downwards bitmaps
+   scale_y = -scale_y;
+
+   {
+      int x,y,i,j;
+      double *precompute;
+      stbtt_vertex *verts;
+      int num_verts = stbtt_GetGlyphShape(info, glyph, &verts);
+      data = (unsigned char *) STBTT_malloc(w * h, info->userdata);
+      precompute = (double *) STBTT_malloc(num_verts * sizeof(double), info->userdata);
+
+      for (i=0,j=num_verts-1; i < num_verts; j=i++) {
+         if (verts[i].type == STBTT_vline) {
+            double x0 = verts[i].x*scale_x, y0 = verts[i].y*scale_y;
+            double x1 = verts[j].x*scale_x, y1 = verts[j].y*scale_y;
+            double dist = (double) STBTT_sqrt((x1-x0)*(x1-x0) + (y1-y0)*(y1-y0));
+            precompute[i] = (dist == 0) ? 0.0f : 1.0f / dist;
+         } else if (verts[i].type == STBTT_vcurve) {
+            double x2 = verts[j].x *scale_x, y2 = verts[j].y *scale_y;
+            double x1 = verts[i].cx*scale_x, y1 = verts[i].cy*scale_y;
+            double x0 = verts[i].x *scale_x, y0 = verts[i].y *scale_y;
+            double bx = x0 - 2*x1 + x2, by = y0 - 2*y1 + y2;
+            double len2 = bx*bx + by*by;
+            if (len2 != 0.0f)
+               precompute[i] = 1.0f / (bx*bx + by*by);
+            else
+               precompute[i] = 0.0f;
+         } else
+            precompute[i] = 0.0f;
+      }
+
+      for (y=iy0; y < iy1; ++y) {
+         for (x=ix0; x < ix1; ++x) {
+            double val;
+            double min_dist = 999999.0f;
+            double sx = (double) x + 0.5f;
+            double sy = (double) y + 0.5f;
+            double x_gspace = (sx / scale_x);
+            double y_gspace = (sy / scale_y);
+
+            int winding = stbtt__compute_crossings_x(x_gspace, y_gspace, num_verts, verts); // @OPTIMIZE: this could just be a rasterization, but needs to be line vs. non-tesselated curves so a new path
+
+            for (i=0; i < num_verts; ++i) {
+               double x0 = verts[i].x*scale_x, y0 = verts[i].y*scale_y;
+
+               // check against every point here rather than inside line/curve primitives -- @TODO: wrong if multiple 'moves' in a row produce a garbage point, and given culling, probably more efficient to do within line/curve
+               double dist2 = (x0-sx)*(x0-sx) + (y0-sy)*(y0-sy);
+               if (dist2 < min_dist*min_dist)
+                  min_dist = (double) STBTT_sqrt(dist2);
+
+               if (verts[i].type == STBTT_vline) {
+                  double x1 = verts[i-1].x*scale_x, y1 = verts[i-1].y*scale_y;
+
+                  // coarse culling against bbox
+                  //if (sx > STBTT_min(x0,x1)-min_dist && sx < STBTT_max(x0,x1)+min_dist &&
+                  //    sy > STBTT_min(y0,y1)-min_dist && sy < STBTT_max(y0,y1)+min_dist)
+                  double dist = (double) STBTT_fabs((x1-x0)*(y0-sy) - (y1-y0)*(x0-sx)) * precompute[i];
+                  STBTT_assert(i != 0);
+                  if (dist < min_dist) {
+                     // check position along line
+                     // x' = x0 + t*(x1-x0), y' = y0 + t*(y1-y0)
+                     // minimize (x'-sx)*(x'-sx)+(y'-sy)*(y'-sy)
+                     double dx = x1-x0, dy = y1-y0;
+                     double px = x0-sx, py = y0-sy;
+                     // minimize (px+t*dx)^2 + (py+t*dy)^2 = px*px + 2*px*dx*t + t^2*dx*dx + py*py + 2*py*dy*t + t^2*dy*dy
+                     // derivative: 2*px*dx + 2*py*dy + (2*dx*dx+2*dy*dy)*t, set to 0 and solve
+                     double t = -(px*dx + py*dy) / (dx*dx + dy*dy);
+                     if (t >= 0.0f && t <= 1.0f)
+                        min_dist = dist;
+                  }
+               } else if (verts[i].type == STBTT_vcurve) {
+                  double x2 = verts[i-1].x *scale_x, y2 = verts[i-1].y *scale_y;
+                  double x1 = verts[i  ].cx*scale_x, y1 = verts[i  ].cy*scale_y;
+                  double box_x0 = STBTT_min(STBTT_min(x0,x1),x2);
+                  double box_y0 = STBTT_min(STBTT_min(y0,y1),y2);
+                  double box_x1 = STBTT_max(STBTT_max(x0,x1),x2);
+                  double box_y1 = STBTT_max(STBTT_max(y0,y1),y2);
+                  // coarse culling against bbox to avoid computing cubic unnecessarily
+                  if (sx > box_x0-min_dist && sx < box_x1+min_dist && sy > box_y0-min_dist && sy < box_y1+min_dist) {
+                     int num=0;
+                     double ax = x1-x0, ay = y1-y0;
+                     double bx = x0 - 2*x1 + x2, by = y0 - 2*y1 + y2;
+                     double mx = x0 - sx, my = y0 - sy;
+                     double res[3]={0},px,py,t,it;
+                     double a_inv = precompute[i];
+                     if (a_inv == 0.0) { // if a_inv is 0, it's 2nd degree so use quadratic formula
+                        double a = 3*(ax*bx + ay*by);
+                        double b = 2*(ax*ax + ay*ay) + (mx*bx+my*by);
+                        double c = mx*ax+my*ay;
+                        if (a == 0.0) { // if a is 0, it's linear
+                           if (b != 0.0) {
+                              res[num++] = -c/b;
+                           }
+                        } else {
+                           double discriminant = b*b - 4*a*c;
+                           if (discriminant < 0)
+                              num = 0;
+                           else {
+                              double root = (double) STBTT_sqrt(discriminant);
+                              res[0] = (-b - root)/(2*a);
+                              res[1] = (-b + root)/(2*a);
+                              num = 2; // don't bother distinguishing 1-solution case, as code below will still work
+                           }
+                        }
+                     } else {
+                        double b = 3*(ax*bx + ay*by) * a_inv; // could precompute this as it doesn't depend on sample point
+                        double c = (2*(ax*ax + ay*ay) + (mx*bx+my*by)) * a_inv;
+                        double d = (mx*ax+my*ay) * a_inv;
+                        num = stbtt__solve_cubic(b, c, d, res);
+                     }
+                     if (num >= 1 && res[0] >= 0.0f && res[0] <= 1.0f) {
+                        t = res[0], it = 1.0f - t;
+                        px = it*it*x0 + 2*t*it*x1 + t*t*x2;
+                        py = it*it*y0 + 2*t*it*y1 + t*t*y2;
+                        dist2 = (px-sx)*(px-sx) + (py-sy)*(py-sy);
+                        if (dist2 < min_dist * min_dist)
+                           min_dist = (double) STBTT_sqrt(dist2);
+                     }
+                     if (num >= 2 && res[1] >= 0.0f && res[1] <= 1.0f) {
+                        t = res[1], it = 1.0f - t;
+                        px = it*it*x0 + 2*t*it*x1 + t*t*x2;
+                        py = it*it*y0 + 2*t*it*y1 + t*t*y2;
+                        dist2 = (px-sx)*(px-sx) + (py-sy)*(py-sy);
+                        if (dist2 < min_dist * min_dist)
+                           min_dist = (double) STBTT_sqrt(dist2);
+                     }
+                     if (num >= 3 && res[2] >= 0.0f && res[2] <= 1.0f) {
+                        t = res[2], it = 1.0f - t;
+                        px = it*it*x0 + 2*t*it*x1 + t*t*x2;
+                        py = it*it*y0 + 2*t*it*y1 + t*t*y2;
+                        dist2 = (px-sx)*(px-sx) + (py-sy)*(py-sy);
+                        if (dist2 < min_dist * min_dist)
+                           min_dist = (double) STBTT_sqrt(dist2);
+                     }
+                  }
+               }
+            }
+            if (winding == 0)
+               min_dist = -min_dist;  // if outside the shape, value is negative
+            val = onedge_value + pixel_dist_scale * min_dist;
+            if (val < 0)
+               val = 0;
+            else if (val > 255)
+               val = 255;
+            data[(y-iy0)*w+(x-ix0)] = (unsigned char) val;
+         }
+      }
+      STBTT_free(precompute, info->userdata);
+      STBTT_free(verts, info->userdata);
+   }
+   return data;
+}
+
+STBTT_DEF unsigned char * stbtt_GetCodepointSDF(const stbtt_fontinfo *info, double scale, int codepoint, int padding, unsigned char onedge_value, double pixel_dist_scale, int *width, int *height, int *xoff, int *yoff)
+{
+   return stbtt_GetGlyphSDF(info, scale, stbtt_FindGlyphIndex(info, codepoint), padding, onedge_value, pixel_dist_scale, width, height, xoff, yoff);
+}
+
+STBTT_DEF void stbtt_FreeSDF(unsigned char *bitmap, void *userdata)
+{
+   STBTT_free(bitmap, userdata);
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// font name matching -- recommended not to use this
+//
+
+// check if a utf8 string contains a prefix which is the utf16 string; if so return length of matching utf8 string
+static stbtt_int32 stbtt__CompareUTF8toUTF16_bigendian_prefix(stbtt_uint8 *s1, stbtt_int32 len1, stbtt_uint8 *s2, stbtt_int32 len2)
+{
+   stbtt_int32 i=0;
+
+   // convert utf16 to utf8 and compare the results while converting
+   while (len2) {
+      stbtt_uint16 ch = s2[0]*256 + s2[1];
+      if (ch < 0x80) {
+         if (i >= len1) return -1;
+         if (s1[i++] != ch) return -1;
+      } else if (ch < 0x800) {
+         if (i+1 >= len1) return -1;
+         if (s1[i++] != 0xc0 + (ch >> 6)) return -1;
+         if (s1[i++] != 0x80 + (ch & 0x3f)) return -1;
+      } else if (ch >= 0xd800 && ch < 0xdc00) {
+         stbtt_uint32 c;
+         stbtt_uint16 ch2 = s2[2]*256 + s2[3];
+         if (i+3 >= len1) return -1;
+         c = ((ch - 0xd800) << 10) + (ch2 - 0xdc00) + 0x10000;
+         if (s1[i++] != 0xf0 + (c >> 18)) return -1;
+         if (s1[i++] != 0x80 + ((c >> 12) & 0x3f)) return -1;
+         if (s1[i++] != 0x80 + ((c >>  6) & 0x3f)) return -1;
+         if (s1[i++] != 0x80 + ((c      ) & 0x3f)) return -1;
+         s2 += 2; // plus another 2 below
+         len2 -= 2;
+      } else if (ch >= 0xdc00 && ch < 0xe000) {
+         return -1;
+      } else {
+         if (i+2 >= len1) return -1;
+         if (s1[i++] != 0xe0 + (ch >> 12)) return -1;
+         if (s1[i++] != 0x80 + ((ch >> 6) & 0x3f)) return -1;
+         if (s1[i++] != 0x80 + ((ch     ) & 0x3f)) return -1;
+      }
+      s2 += 2;
+      len2 -= 2;
+   }
+   return i;
+}
+
+static int stbtt_CompareUTF8toUTF16_bigendian_internal(char *s1, int len1, char *s2, int len2)
+{
+   return len1 == stbtt__CompareUTF8toUTF16_bigendian_prefix((stbtt_uint8*) s1, len1, (stbtt_uint8*) s2, len2);
+}
+
+// returns results in whatever encoding you request... but note that 2-byte encodings
+// will be BIG-ENDIAN... use stbtt_CompareUTF8toUTF16_bigendian() to compare
+STBTT_DEF const char *stbtt_GetFontNameString(const stbtt_fontinfo *font, int *length, int platformID, int encodingID, int languageID, int nameID)
+{
+   stbtt_int32 i,count,stringOffset;
+   stbtt_uint8 *fc = font->data;
+   stbtt_uint32 offset = font->fontstart;
+   stbtt_uint32 nm = stbtt__find_table(fc, offset, "name");
+   if (!nm) return NULL;
+
+   count = ttUSHORT(fc+nm+2);
+   stringOffset = nm + ttUSHORT(fc+nm+4);
+   for (i=0; i < count; ++i) {
+      stbtt_uint32 loc = nm + 6 + 12 * i;
+      if (platformID == ttUSHORT(fc+loc+0) && encodingID == ttUSHORT(fc+loc+2)
+          && languageID == ttUSHORT(fc+loc+4) && nameID == ttUSHORT(fc+loc+6)) {
+         *length = ttUSHORT(fc+loc+8);
+         return (const char *) (fc+stringOffset+ttUSHORT(fc+loc+10));
+      }
+   }
+   return NULL;
+}
+
+static int stbtt__matchpair(stbtt_uint8 *fc, stbtt_uint32 nm, stbtt_uint8 *name, stbtt_int32 nlen, stbtt_int32 target_id, stbtt_int32 next_id)
+{
+   stbtt_int32 i;
+   stbtt_int32 count = ttUSHORT(fc+nm+2);
+   stbtt_int32 stringOffset = nm + ttUSHORT(fc+nm+4);
+
+   for (i=0; i < count; ++i) {
+      stbtt_uint32 loc = nm + 6 + 12 * i;
+      stbtt_int32 id = ttUSHORT(fc+loc+6);
+      if (id == target_id) {
+         // find the encoding
+         stbtt_int32 platform = ttUSHORT(fc+loc+0), encoding = ttUSHORT(fc+loc+2), language = ttUSHORT(fc+loc+4);
+
+         // is this a Unicode encoding?
+         if (platform == 0 || (platform == 3 && encoding == 1) || (platform == 3 && encoding == 10)) {
+            stbtt_int32 slen = ttUSHORT(fc+loc+8);
+            stbtt_int32 off = ttUSHORT(fc+loc+10);
+
+            // check if there's a prefix match
+            stbtt_int32 matchlen = stbtt__CompareUTF8toUTF16_bigendian_prefix(name, nlen, fc+stringOffset+off,slen);
+            if (matchlen >= 0) {
+               // check for target_id+1 immediately following, with same encoding & language
+               if (i+1 < count && ttUSHORT(fc+loc+12+6) == next_id && ttUSHORT(fc+loc+12) == platform && ttUSHORT(fc+loc+12+2) == encoding && ttUSHORT(fc+loc+12+4) == language) {
+                  slen = ttUSHORT(fc+loc+12+8);
+                  off = ttUSHORT(fc+loc+12+10);
+                  if (slen == 0) {
+                     if (matchlen == nlen)
+                        return 1;
+                  } else if (matchlen < nlen && name[matchlen] == ' ') {
+                     ++matchlen;
+                     if (stbtt_CompareUTF8toUTF16_bigendian_internal((char*) (name+matchlen), nlen-matchlen, (char*)(fc+stringOffset+off),slen))
+                        return 1;
+                  }
+               } else {
+                  // if nothing immediately following
+                  if (matchlen == nlen)
+                     return 1;
+               }
+            }
+         }
+
+         // @TODO handle other encodings
+      }
+   }
+   return 0;
+}
+
+static int stbtt__matches(stbtt_uint8 *fc, stbtt_uint32 offset, stbtt_uint8 *name, stbtt_int32 flags)
+{
+   stbtt_int32 nlen = (stbtt_int32) STBTT_strlen((char *) name);
+   stbtt_uint32 nm,hd;
+   if (!stbtt__isfont(fc+offset)) return 0;
+
+   // check italics/bold/underline flags in macStyle...
+   if (flags) {
+      hd = stbtt__find_table(fc, offset, "head");
+      if ((ttUSHORT(fc+hd+44) & 7) != (flags & 7)) return 0;
+   }
+
+   nm = stbtt__find_table(fc, offset, "name");
+   if (!nm) return 0;
+
+   if (flags) {
+      // if we checked the macStyle flags, then just check the family and ignore the subfamily
+      if (stbtt__matchpair(fc, nm, name, nlen, 16, -1))  return 1;
+      if (stbtt__matchpair(fc, nm, name, nlen,  1, -1))  return 1;
+      if (stbtt__matchpair(fc, nm, name, nlen,  3, -1))  return 1;
+   } else {
+      if (stbtt__matchpair(fc, nm, name, nlen, 16, 17))  return 1;
+      if (stbtt__matchpair(fc, nm, name, nlen,  1,  2))  return 1;
+      if (stbtt__matchpair(fc, nm, name, nlen,  3, -1))  return 1;
+   }
+
+   return 0;
+}
+
+static int stbtt_FindMatchingFont_internal(unsigned char *font_collection, char *name_utf8, stbtt_int32 flags)
+{
+   stbtt_int32 i;
+   for (i=0;;++i) {
+      stbtt_int32 off = stbtt_GetFontOffsetForIndex(font_collection, i);
+      if (off < 0) return off;
+      if (stbtt__matches((stbtt_uint8 *) font_collection, off, (stbtt_uint8*) name_utf8, flags))
+         return off;
+   }
+}
+
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
+STBTT_DEF int stbtt_BakeFontBitmap(const unsigned char *data, int offset,
+                                double pixel_height, unsigned char *pixels, int pw, int ph,
+                                int first_char, int num_chars, stbtt_bakedchar *chardata)
+{
+   return stbtt_BakeFontBitmap_internal((unsigned char *) data, offset, pixel_height, pixels, pw, ph, first_char, num_chars, chardata);
+}
+
+STBTT_DEF int stbtt_GetFontOffsetForIndex(const unsigned char *data, int index)
+{
+   return stbtt_GetFontOffsetForIndex_internal((unsigned char *) data, index);
+}
+
+STBTT_DEF int stbtt_GetNumberOfFonts(const unsigned char *data)
+{
+   return stbtt_GetNumberOfFonts_internal((unsigned char *) data);
+}
+
+STBTT_DEF int stbtt_InitFont(stbtt_fontinfo *info, const unsigned char *data, int offset)
+{
+   return stbtt_InitFont_internal(info, (unsigned char *) data, offset);
+}
+
+STBTT_DEF int stbtt_FindMatchingFont(const unsigned char *fontdata, const char *name, int flags)
+{
+   return stbtt_FindMatchingFont_internal((unsigned char *) fontdata, (char *) name, flags);
+}
+
+STBTT_DEF int stbtt_CompareUTF8toUTF16_bigendian(const char *s1, int len1, const char *s2, int len2)
+{
+   return stbtt_CompareUTF8toUTF16_bigendian_internal((char *) s1, len1, (char *) s2, len2);
+}
+
+#if defined(__GNUC__) || defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
+
+#endif // STB_TRUETYPE_IMPLEMENTATION
+
+
+// FULL VERSION HISTORY
+//
+//   1.19 (2018-02-11) OpenType GPOS kerning (horizontal only), STBTT_fmod
+//   1.18 (2018-01-29) add missing function
+//   1.17 (2017-07-23) make more arguments const; doc fix
+//   1.16 (2017-07-12) SDF support
+//   1.15 (2017-03-03) make more arguments const
+//   1.14 (2017-01-16) num-fonts-in-TTC function
+//   1.13 (2017-01-02) support OpenType fonts, certain Apple fonts
+//   1.12 (2016-10-25) suppress warnings about casting away const with -Wcast-qual
+//   1.11 (2016-04-02) fix unused-variable warning
+//   1.10 (2016-04-02) allow user-defined fabs() replacement
+//                     fix memory leak if fontsize=0.0
+//                     fix warning from duplicate typedef
+//   1.09 (2016-01-16) warning fix; avoid crash on outofmem; use alloc userdata for PackFontRanges
+//   1.08 (2015-09-13) document stbtt_Rasterize(); fixes for vertical & horizontal edges
+//   1.07 (2015-08-01) allow PackFontRanges to accept arrays of sparse codepoints;
+//                     allow PackFontRanges to pack and render in separate phases;
+//                     fix stbtt_GetFontOFfsetForIndex (never worked for non-0 input?);
+//                     fixed an assert() bug in the new rasterizer
+//                     replace assert() with STBTT_assert() in new rasterizer
+//   1.06 (2015-07-14) performance improvements (~35% faster on x86 and x64 on test machine)
+//                     also more precise AA rasterizer, except if shapes overlap
+//                     remove need for STBTT_sort
+//   1.05 (2015-04-15) fix misplaced definitions for STBTT_STATIC
+//   1.04 (2015-04-15) typo in example
+//   1.03 (2015-04-12) STBTT_STATIC, fix memory leak in new packing, various fixes
+//   1.02 (2014-12-10) fix various warnings & compile issues w/ stb_rect_pack, C++
+//   1.01 (2014-12-08) fix subpixel position when oversampling to exactly match
+//                        non-oversampled; STBTT_POINT_SIZE for packed case only
+//   1.00 (2014-12-06) add new PackBegin etc. API, w/ support for oversampling
+//   0.99 (2014-09-18) fix multiple bugs with subpixel rendering (ryg)
+//   0.9  (2014-08-07) support certain mac/iOS fonts without an MS platformID
+//   0.8b (2014-07-07) fix a warning
+//   0.8  (2014-05-25) fix a few more warnings
+//   0.7  (2013-09-25) bugfix: subpixel glyph bug fixed in 0.5 had come back
+//   0.6c (2012-07-24) improve documentation
+//   0.6b (2012-07-20) fix a few more warnings
+//   0.6  (2012-07-17) fix warnings; added stbtt_ScaleForMappingEmToPixels,
+//                        stbtt_GetFontBoundingBox, stbtt_IsGlyphEmpty
+//   0.5  (2011-12-09) bugfixes:
+//                        subpixel glyph renderer computed wrong bounding box
+//                        first vertex of shape can be off-curve (FreeSans)
+//   0.4b (2011-12-03) fixed an error in the font baking example
+//   0.4  (2011-12-01) kerning, subpixel rendering (tor)
+//                    bugfixes for:
+//                        codepoint-to-glyph conversion using table fmt=12
+//                        codepoint-to-glyph conversion using table fmt=4
+//                        stbtt_GetBakedQuad with non-square texture (Zer)
+//                    updated Hello World! sample to use kerning and subpixel
+//                    fixed some warnings
+//   0.3  (2009-06-24) cmap fmt=12, compound shapes (MM)
+//                    userdata, malloc-from-userdata, non-zero fill (stb)
+//   0.2  (2009-03-11) Fix unsigned/signed char warnings
+//   0.1  (2009-03-09) First public release
+//
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
diff --git a/libtesla/include/tesla.hpp b/libtesla/include/tesla.hpp
index 0016eca..bf15f1c 100644
--- a/libtesla/include/tesla.hpp
+++ b/libtesla/include/tesla.hpp
@@ -1,3659 +1,3659 @@
-/**
- * Copyright (C) 2020 werwolv
- *
- * This file is part of libtesla.
- *
- * libtesla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 2 of the License, or
- * (at your option) any later version.
- *
- * libtesla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with libtesla.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#pragma once
-
-#include <switch.h>
-
-#include <stdlib.h>
-#include <strings.h>
-#include <math.h>
-
-#include <algorithm>
-#include <cstring>
-#include <cwctype>
-#include <string>
-#include <functional>
-#include <type_traits>
-#include <mutex>
-#include <memory>
-#include <chrono>
-#include <list>
-#include <stack>
-#include <map>
-#include <filesystem>
-
-// Define this makro before including tesla.hpp in your main file. If you intend
-// to use the tesla.hpp header in more than one source file, only define it once!
-// #define TESLA_INIT_IMPL
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-function"
-
-#ifdef TESLA_INIT_IMPL
-    #define STB_TRUETYPE_IMPLEMENTATION
-#endif
-#include "stb_truetype.h"
-
-#pragma GCC diagnostic pop
-
-#define ELEMENT_BOUNDS(elem) elem->getX(), elem->getY(), elem->getWidth(), elem->getHeight()
-
-#define ASSERT_EXIT(x) if (R_FAILED(x)) std::exit(1)
-#define ASSERT_FATAL(x) if (Result res = x; R_FAILED(res)) fatalThrow(res)
-
-#define PACKED __attribute__((packed))
-#define ALWAYS_INLINE inline __attribute__((always_inline))
-
-/// Evaluates an expression that returns a result, and returns the result if it would fail.
-#define TSL_R_TRY(resultExpr)           \
-    ({                                  \
-        const auto result = resultExpr; \
-        if (R_FAILED(result)) {         \
-            return result;              \
-        }                               \
-    })
-
-using namespace std::literals::string_literals;
-using namespace std::literals::chrono_literals;
-
-namespace tsl {
-
-    // Constants
-
-    namespace cfg {
-
-        constexpr u32 ScreenWidth = 1920;       ///< Width of the Screen
-        constexpr u32 ScreenHeight = 1080;      ///< Height of the Screen
-
-        extern u16 LayerWidth;                  ///< Width of the Tesla layer
-        extern u16 LayerHeight;                 ///< Height of the Tesla layer
-        extern u16 LayerPosX;                   ///< X position of the Tesla layer
-        extern u16 LayerPosY;                   ///< Y position of the Tesla layer
-        extern u16 FramebufferWidth;            ///< Width of the framebuffer
-        extern u16 FramebufferHeight;           ///< Height of the framebuffer
-        extern u64 launchCombo;                 ///< Overlay activation key combo
-
-    }
-
-    /**
-     * @brief RGBA4444 Color structure
-     */
-    struct Color {
-
-        union {
-            struct {
-                u16 r: 4, g: 4, b: 4, a: 4;
-            } PACKED;
-            u16 rgba;
-        };
-
-        constexpr inline Color(u16 raw): rgba(raw) {}
-        constexpr inline Color(u8 r, u8 g, u8 b, u8 a): r(r), g(g), b(b), a(a) {}
-    };
-
-    namespace style {
-        constexpr u32 ListItemDefaultHeight         = 70;       ///< Standard list item height
-        constexpr u32 TrackBarDefaultHeight         = 90;       ///< Standard track bar height
-        constexpr u8  ListItemHighlightSaturation   = 6;        ///< Maximum saturation of Listitem highlights
-        constexpr u8  ListItemHighlightLength       = 22;       ///< Maximum length of Listitem highlights
-
-        namespace color {
-            constexpr Color ColorFrameBackground  = { 0x0, 0x0, 0x0, 0xD };   ///< Overlay frame background color
-            constexpr Color ColorTransparent      = { 0x0, 0x0, 0x0, 0x0 };   ///< Transparent color
-            constexpr Color ColorHighlight        = { 0x0, 0xF, 0xD, 0xF };   ///< Greenish highlight color
-            constexpr Color ColorFrame            = { 0x7, 0x7, 0x7, 0xF };   ///< Outer boarder color
-            constexpr Color ColorHandle           = { 0x5, 0x5, 0x5, 0xF };   ///< Track bar handle color
-            constexpr Color ColorText             = { 0xF, 0xF, 0xF, 0xF };   ///< Standard text color
-            constexpr Color ColorDescription      = { 0xA, 0xA, 0xA, 0xF };   ///< Description text color
-            constexpr Color ColorHeaderBar        = { 0xC, 0xC, 0xC, 0xF };   ///< Category header rectangle color
-            constexpr Color ColorClickAnimation   = { 0x0, 0x2, 0x2, 0xF };   ///< Element click animation color
-        }
-    }
-
-    // Declarations
-
-    /**
-     * @brief Direction in which focus moved before landing on
-     *        the currently focused element
-     */
-    enum class FocusDirection {
-        None,                       ///< Focus was placed on the element programatically without user input
-        Up,                         ///< Focus moved upwards
-        Down,                       ///< Focus moved downwards
-        Left,                       ///< Focus moved from left to rigth
-        Right                       ///< Focus moved from right to left
-    };
-
-    /**
-     * @brief Current input controll mode
-     *
-     */
-    enum class InputMode {
-        Controller,                 ///< Input from controller
-        Touch,                      ///< Touch input
-        TouchScroll                 ///< Moving/scrolling touch input
-    };
-
-    class Overlay;
-    namespace elm { class Element; }
-
-    namespace impl {
-
-        /**
-         * @brief Overlay launch parameters
-         */
-        enum class LaunchFlags : u8 {
-            None = 0,                       ///< Do nothing special at launch
-            CloseOnExit        = BIT(0)     ///< Close the overlay the last Gui gets poped from the stack
-        };
-
-        [[maybe_unused]] static constexpr LaunchFlags operator|(LaunchFlags lhs, LaunchFlags rhs) {
-            return static_cast<LaunchFlags>(u8(lhs) | u8(rhs));
-        }
-
-        /**
-         * @brief Combo key mapping
-         */
-        struct KeyInfo {
-            u64 key;
-            const char* name;
-            const char* glyph;
-        };
-
-        /**
-         * @brief Combo key mappings
-         *
-         * Ordered as they should be displayed
-         */
-        constexpr std::array<KeyInfo, 18> KEYS_INFO = {{
-            { HidNpadButton_L, "L", "\uE0A4" }, { HidNpadButton_R, "R", "\uE0A5" },
-            { HidNpadButton_ZL, "ZL", "\uE0A6" }, { HidNpadButton_ZR, "ZR", "\uE0A7" },
-            { HidNpadButton_AnySL, "SL", "\uE0A8" }, { HidNpadButton_AnySR, "SR", "\uE0A9" },
-            { HidNpadButton_Left, "DLEFT", "\uE07B" }, { HidNpadButton_Up, "DUP", "\uE079" }, { HidNpadButton_Right, "DRIGHT", "\uE07C" }, { HidNpadButton_Down, "DDOWN", "\uE07A" },
-            { HidNpadButton_A, "A", "\uE0A0" }, { HidNpadButton_B, "B", "\uE0A1" }, { HidNpadButton_X, "X", "\uE0A2" }, { HidNpadButton_Y, "Y", "\uE0A3" },
-            { HidNpadButton_StickL, "LS", "\uE08A" }, { HidNpadButton_StickR, "RS", "\uE08B" },
-            { HidNpadButton_Minus, "MINUS", "\uE0B6" }, { HidNpadButton_Plus, "PLUS", "\uE0B5" }
-        }};
-
-    }
-
-    [[maybe_unused]] static void goBack();
-
-    [[maybe_unused]] static void setNextOverlay(const std::string& ovlPath, std::string args = "");
-
-    template<typename TOverlay, impl::LaunchFlags launchFlags = impl::LaunchFlags::CloseOnExit>
-    int loop(int argc, char** argv);
-
-    // Helpers
-
-    namespace hlp {
-
-        /**
-         * @brief Wrapper for service initialization
-         *
-         * @param f wrapped function
-         */
-        template<typename F>
-        static inline void doWithSmSession(F f) {
-            smInitialize();
-            f();
-            smExit();
-        }
-
-        /**
-         * @brief Wrapper for sd card access using stdio
-         * @note Consider using raw fs calls instead as they are faster and need less space
-         *
-         * @param f wrapped function
-         */
-        template<typename F>
-        static inline void doWithSDCardHandle(F f) {
-            fsdevMountSdmc();
-            f();
-            fsdevUnmountDevice("sdmc");
-        }
-
-        /**
-         * @brief Guard that will execute a passed function at the end of the current scope
-         *
-         * @param f wrapped function
-         */
-        template<typename F>
-        class ScopeGuard {
-            ScopeGuard(const ScopeGuard&) = delete;
-            ScopeGuard& operator=(const ScopeGuard&) = delete;
-            private:
-                F f;
-                bool canceled = false;
-            public:
-                ALWAYS_INLINE ScopeGuard(F f) : f(std::move(f)) { }
-                ALWAYS_INLINE ~ScopeGuard() { if (!canceled) { f(); } }
-                void dismiss() { canceled = true; }
-        };
-
-        /**
-         * @brief libnx hid:sys shim that gives or takes away frocus to or from the process with the given aruid
-         *
-         * @param enable Give focus or take focus
-         * @param aruid Aruid of the process to focus/unfocus
-         * @return Result Result
-         */
-        static Result hidsysEnableAppletToGetInput(bool enable, u64 aruid) {
-            const struct {
-                u8 permitInput;
-                u64 appletResourceUserId;
-            } in = { enable != 0, aruid };
-
-            return serviceDispatchIn(hidsysGetServiceSession(), 503, in);
-        }
-
-        static Result viAddToLayerStack(ViLayer *layer, ViLayerStack stack) {
-            const struct {
-                u32 stack;
-                u64 layerId;
-            } in = { stack, layer->layer_id };
-
-            return serviceDispatchIn(viGetSession_IManagerDisplayService(), 6000, in);
-        }
-
-        /**
-         * @brief Toggles focus between the Tesla overlay and the rest of the system
-         *
-         * @param enabled Focus Tesla?
-         */
-        static void requestForeground(bool enabled) {
-            u64 applicationAruid = 0, appletAruid = 0;
-
-            for (u64 programId = 0x0100000000001000UL; programId < 0x0100000000001020UL; programId++) {
-                pmdmntGetProcessId(&appletAruid, programId);
-
-                if (appletAruid != 0)
-                    hidsysEnableAppletToGetInput(!enabled, appletAruid);
-            }
-
-            pmdmntGetApplicationProcessId(&applicationAruid);
-            hidsysEnableAppletToGetInput(!enabled, applicationAruid);
-
-            hidsysEnableAppletToGetInput(true, 0);
-        }
-
-        /**
-         * @brief Splits a string at the given delimeters
-         *
-         * @param str String to split
-         * @param delim Delimeter
-         * @return Vector containing the split tokens
-         */
-        static std::vector<std::string> split(const std::string& str, char delim = ' ') {
-            std::vector<std::string> out;
-
-            std::size_t current, previous = 0;
-            current = str.find(delim);
-            while (current != std::string::npos) {
-                out.push_back(str.substr(previous, current - previous));
-                previous = current + 1;
-                current = str.find(delim, previous);
-            }
-            out.push_back(str.substr(previous, current - previous));
-
-            return out;
-        }
-
-        namespace ini {
-
-            /**
-             * @brief Ini file type
-             */
-            using IniData = std::map<std::string, std::map<std::string, std::string>>;
-
-            /**
-             * @brief Tesla config file
-             */
-            static const char* CONFIG_FILE = "/config/tesla/config.ini";
-
-            /**
-             * @brief Parses a ini string
-             *
-             * @param str String to parse
-             * @return Parsed data
-             */
-            static IniData parseIni(const std::string &str) {
-                IniData iniData;
-
-                auto lines = split(str, '\n');
-
-                std::string lastHeader = "";
-                for (auto& line : lines) {
-                    line.erase(std::remove_if(line.begin(), line.end(), ::isspace), line.end());
-
-                    if (line[0] == '[' && line[line.size() - 1] == ']') {
-                        lastHeader = line.substr(1, line.size() - 2);
-                        iniData.emplace(lastHeader, std::map<std::string, std::string>{});
-                    }
-                    else if (auto keyValuePair = split(line, '='); keyValuePair.size() == 2) {
-                        iniData[lastHeader].emplace(keyValuePair[0], keyValuePair[1]);
-                    }
-                }
-
-                return iniData;
-            }
-
-            /**
-             * @brief Unparses ini data into a string
-             *
-             * @param iniData Ini data
-             * @return Ini string
-             */
-            static std::string unparseIni(IniData const &iniData) {
-                std::string string;
-                bool addSectionGap = false;
-                for (auto &section : iniData) {
-                    if (addSectionGap)
-                        string += "\n";
-                    string += "["s + section.first + "]\n"s;
-                    for (auto &keyValue : section.second) {
-                        string += keyValue.first + "="s + keyValue.second + "\n"s;
-                    }
-                }
-                return string;
-            }
-
-            /**
-             * @brief Read Tesla settings file
-             *
-             * @return Settings data
-             */
-            static IniData readOverlaySettings() {
-                /* Open Sd card filesystem. */
-                FsFileSystem fsSdmc;
-                if (R_FAILED(fsOpenSdCardFileSystem(&fsSdmc)))
-                    return {};
-                hlp::ScopeGuard fsGuard([&] { fsFsClose(&fsSdmc); });
-
-                /* Open config file. */
-                FsFile fileConfig;
-                if (R_FAILED(fsFsOpenFile(&fsSdmc, CONFIG_FILE, FsOpenMode_Read, &fileConfig)))
-                    return {};
-                hlp::ScopeGuard fileGuard([&] { fsFileClose(&fileConfig); });
-
-                /* Get config file size. */
-                s64 configFileSize;
-                if (R_FAILED(fsFileGetSize(&fileConfig, &configFileSize)))
-                    return {};
-
-                /* Read and parse config file. */
-                std::string configFileData(configFileSize, '\0');
-                u64 readSize;
-                Result rc = fsFileRead(&fileConfig, 0, configFileData.data(), configFileSize, FsReadOption_None, &readSize);
-                if (R_FAILED(rc) || readSize != static_cast<u64>(configFileSize))
-                    return {};
-
-                return parseIni(configFileData);
-            }
-
-            /**
-             * @brief Replace Tesla settings file with new data
-             *
-             * @param iniData new data
-             */
-            static void writeOverlaySettings(IniData const &iniData) {
-                /* Open Sd card filesystem. */
-                FsFileSystem fsSdmc;
-                if (R_FAILED(fsOpenSdCardFileSystem(&fsSdmc)))
-                    return;
-                hlp::ScopeGuard fsGuard([&] { fsFsClose(&fsSdmc); });
-
-                /* Open config file. */
-                FsFile fileConfig;
-                if (R_FAILED(fsFsOpenFile(&fsSdmc, CONFIG_FILE, FsOpenMode_Write, &fileConfig)))
-                    return;
-                hlp::ScopeGuard fileGuard([&] { fsFileClose(&fileConfig); });
-
-                std::string iniString = unparseIni(iniData);
-
-                fsFileWrite(&fileConfig, 0, iniString.c_str(), iniString.length(), FsWriteOption_Flush);
-            }
-
-            /**
-             * @brief Merge and save changes into Tesla settings file
-             *
-             * @param changes setting values to add or update
-             */
-            static void updateOverlaySettings(IniData const &changes) {
-                hlp::ini::IniData iniData = hlp::ini::readOverlaySettings();
-                for (auto &section : changes) {
-                    for (auto &keyValue : section.second) {
-                        iniData[section.first][keyValue.first] = keyValue.second;
-                    }
-                }
-                writeOverlaySettings(iniData);
-            }
-
-        }
-
-        /**
-         * @brief Decodes a key string into it's key code
-         *
-         * @param value Key string
-         * @return Key code
-         */
-        static u64 stringToKeyCode(const std::string &value) {
-            for (auto &keyInfo : impl::KEYS_INFO) {
-                if (strcasecmp(value.c_str(), keyInfo.name) == 0)
-                    return keyInfo.key;
-            }
-            return 0;
-        }
-
-        /**
-         * @brief Decodes a combo string into key codes
-         *
-         * @param value Combo string
-         * @return Key codes
-         */
-        static u64 comboStringToKeys(const std::string &value) {
-            u64 keyCombo = 0x00;
-            for (std::string key : hlp::split(value, '+')) {
-                keyCombo |= hlp::stringToKeyCode(key);
-            }
-            return keyCombo;
-        }
-
-        /**
-         * @brief Encodes key codes into a combo string
-         *
-         * @param keys Key codes
-         * @return Combo string
-         */
-        static std::string keysToComboString(u64 keys) {
-            std::string str;
-            for (auto &keyInfo : impl::KEYS_INFO) {
-                if (keys & keyInfo.key) {
-                    if (!str.empty())
-                        str.append("+");
-                    str.append(keyInfo.name);
-                }
-            }
-            return str;
-        }
-
-    }
-
-    // Renderer
-
-    namespace gfx {
-
-        extern "C" u64 __nx_vi_layer_id;
-
-        struct ScissoringConfig {
-            s32 x, y, w, h;
-        };
-
-        /**
-         * @brief Manages the Tesla layer and draws raw data to the screen
-         */
-        class Renderer final {
-        public:
-            Renderer& operator=(Renderer&) = delete;
-
-            friend class tsl::Overlay;
-
-            /**
-             * @brief Handles opacity of drawn colors for fadeout. Pass all colors through this function in order to apply opacity properly
-             *
-             * @param c Original color
-             * @return Color with applied opacity
-             */
-            static Color a(const Color &c) {
-                return (c.rgba & 0x0FFF) | (static_cast<u8>(c.a * Renderer::s_opacity) << 12);
-            }
-
-            /**
-             * @brief Enables scissoring, discarding of any draw outside the given boundaries
-             *
-             * @param x x pos
-             * @param y y pos
-             * @param w Width
-             * @param h Height
-             */
-            inline void enableScissoring(s32 x, s32 y, s32 w, s32 h) {
-                this->m_scissoringStack.emplace(x, y, w, h);
-            }
-
-            /**
-             * @brief Disables scissoring
-             */
-            inline void disableScissoring() {
-                this->m_scissoringStack.pop();
-            }
-
-
-            // Drawing functions
-
-            /**
-             * @brief Draw a single pixel onto the screen
-             *
-             * @param x X pos
-             * @param y Y pos
-             * @param color Color
-             */
-            inline void setPixel(s32 x, s32 y, Color color) {
-                if (x < 0 || y < 0 || x >= cfg::FramebufferWidth || y >= cfg::FramebufferHeight)
-                    return;
-
-                u32 offset = this->getPixelOffset(x, y);
-
-                if (offset != UINT32_MAX)
-                    static_cast<Color*>(this->getCurrentFramebuffer())[offset] = color;
-            }
-
-            /**
-             * @brief Blends two colors
-             *
-             * @param src Source color
-             * @param dst Destination color
-             * @param alpha Opacity
-             * @return Blended color
-             */
-            inline u8 blendColor(u8 src, u8 dst, u8 alpha) {
-                u8 oneMinusAlpha = 0x0F - alpha;
-
-                return (dst * alpha + src * oneMinusAlpha) / double(0xF);
-            }
-
-            /**
-             * @brief Draws a single source blended pixel onto the screen
-             *
-             * @param x X pos
-             * @param y Y pos
-             * @param color Color
-             */
-            inline void setPixelBlendSrc(s32 x, s32 y, Color color) {
-                if (x < 0 || y < 0 || x >= cfg::FramebufferWidth || y >= cfg::FramebufferHeight)
-                    return;
-
-                u32 offset = this->getPixelOffset(x, y);
-
-                if (offset == UINT32_MAX)
-                    return;
-
-                Color src((static_cast<u16*>(this->getCurrentFramebuffer()))[offset]);
-                Color dst(color);
-                Color end(0);
-
-                end.r = this->blendColor(src.r, dst.r, dst.a);
-                end.g = this->blendColor(src.g, dst.g, dst.a);
-                end.b = this->blendColor(src.b, dst.b, dst.a);
-                end.a = src.a;
-
-                this->setPixel(x, y, end);
-            }
-
-            /**
-             * @brief Draws a single destination blended pixel onto the screen
-             *
-             * @param x X pos
-             * @param y Y pos
-             * @param color Color
-             */
-            inline void setPixelBlendDst(s32 x, s32 y, Color color) {
-                if (x < 0 || y < 0 || x >= cfg::FramebufferWidth || y >= cfg::FramebufferHeight)
-                    return;
-
-                u32 offset = this->getPixelOffset(x, y);
-
-                if (offset == UINT32_MAX)
-                    return;
-
-                Color src((static_cast<u16*>(this->getCurrentFramebuffer()))[offset]);
-                Color dst(color);
-                Color end(0);
-
-                end.r = this->blendColor(src.r, dst.r, dst.a);
-                end.g = this->blendColor(src.g, dst.g, dst.a);
-                end.b = this->blendColor(src.b, dst.b, dst.a);
-                end.a = std::min(dst.a + src.a, 0xF);
-
-                this->setPixel(x, y, end);
-            }
-
-            /**
-             * @brief Draws a rectangle of given sizes
-             *
-             * @param x X pos
-             * @param y Y pos
-             * @param w Width
-             * @param h Height
-             * @param color Color
-             */
-            inline void drawRect(s32 x, s32 y, s32 w, s32 h, Color color) {
-                for (s32 x1 = x; x1 < (x + w); x1++)
-                    for (s32 y1 = y; y1 < (y + h); y1++)
-                        this->setPixelBlendDst(x1, y1, color);
-            }
-
-            void drawCircle(s32 centerX, s32 centerY, u16 radius, bool filled, Color color) {
-                s32 x = radius;
-                s32 y = 0;
-                s32 radiusError = 0;
-                s32 xChange = 1 - (radius << 1);
-                s32 yChange = 0;
-
-                while (x >= y) {
-                    if(filled) {
-                        for (s32 i = centerX - x; i <= centerX + x; i++) {
-                            s32 y0 = centerY + y;
-                            s32 y1 = centerY - y;
-                            s32 x0 = i;
-
-                            this->setPixelBlendDst(x0, y0, color);
-                            this->setPixelBlendDst(x0, y1, color);
-                        }
-
-                        for (s32 i = centerX - y; i <= centerX + y; i++) {
-                            s32 y0 = centerY + x;
-                            s32 y1 = centerY - x;
-                            s32 x0 = i;
-
-                            this->setPixelBlendDst(x0, y0, color);
-                            this->setPixelBlendDst(x0, y1, color);
-                        }
-
-                        y++;
-                        radiusError += yChange;
-                        yChange += 2;
-                        if (((radiusError << 1) + xChange) > 0) {
-                            x--;
-                            radiusError += xChange;
-                            xChange += 2;
-                        }
-                    } else {
-                        this->setPixelBlendDst(centerX + x, centerY + y, color);
-                        this->setPixelBlendDst(centerX + y, centerY + x, color);
-                        this->setPixelBlendDst(centerX - y, centerY + x, color);
-                        this->setPixelBlendDst(centerX - x, centerY + y, color);
-                        this->setPixelBlendDst(centerX - x, centerY - y, color);
-                        this->setPixelBlendDst(centerX - y, centerY - x, color);
-                        this->setPixelBlendDst(centerX + y, centerY - x, color);
-                        this->setPixelBlendDst(centerX + x, centerY - y, color);
-
-                        if(radiusError <= 0) {
-                            y++;
-                            radiusError += 2 * y + 1;
-                        } else {
-                            x--;
-                            radiusError -= 2 * x + 1;
-                        }
-                    }
-                }
-            }
-
-            /**
-             * @brief Draws a RGBA8888 bitmap from memory
-             *
-             * @param x X start position
-             * @param y Y start position
-             * @param w Bitmap width
-             * @param h Bitmap height
-             * @param bmp Pointer to bitmap data
-             */
-            void drawBitmap(s32 x, s32 y, s32 w, s32 h, const u8 *bmp) {
-                for (s32 y1 = 0; y1 < h; y1++) {
-                    for (s32 x1 = 0; x1 < w; x1++) {
-                        const Color color = { static_cast<u8>(bmp[0] >> 4), static_cast<u8>(bmp[1] >> 4), static_cast<u8>(bmp[2] >> 4), static_cast<u8>(bmp[3] >> 4) };
-                        setPixelBlendSrc(x + x1, y + y1, a(color));
-                        bmp += 4;
-                    }
-                }
-            }
-
-            /**
-             * @brief Fills the entire layer with a given color
-             *
-             * @param color Color
-             */
-            inline void fillScreen(Color color) {
-                std::fill_n(static_cast<Color*>(this->getCurrentFramebuffer()), this->getFramebufferSize() / sizeof(Color), color);
-            }
-
-            /**
-             * @brief Clears the layer (With transparency)
-             *
-             */
-            inline void clearScreen() {
-                this->fillScreen({ 0x00, 0x00, 0x00, 0x00 });
-            }
-
-            /**
-             * @brief Draws a string
-             *
-             * @param string String to draw
-             * @param monospace Draw string in monospace font
-             * @param x X pos
-             * @param y Y pos
-             * @param fontSize Height of the text drawn in pixels
-             * @param color Text color. Use transparent color to skip drawing and only get the string's dimensions
-             * @return Dimensions of drawn string
-             */
-            std::pair<u32, u32> drawString(const char* string, bool monospace, s32 x, s32 y, double fontSize, Color color, ssize_t maxWidth = 0) {
-                s32 maxX = x;
-                s32 currX = x;
-                s32 currY = y;
-
-                struct Glyph {
-                    stbtt_fontinfo *currFont;
-                    double currFontSize;
-                    int bounds[4];
-                    int xAdvance;
-                    u8 *glyphBmp;
-                    int width, height;
-                };
-
-                static std::unordered_map<u64, Glyph> s_glyphCache;
-
-                do {
-                    if (maxWidth > 0 && maxWidth < (currX - x))
-                        break;
-
-                    u32 currCharacter;
-                    ssize_t codepointWidth = decode_utf8(&currCharacter, reinterpret_cast<const u8*>(string));
-
-                    if (codepointWidth <= 0)
-                        break;
-
-                    string += codepointWidth;
-
-                    if (currCharacter == '\n') {
-                        maxX = std::max(currX, maxX);
-
-                        currX = x;
-                        currY += fontSize;
-
-                        continue;
-                    }
-
-                    u64 key = (static_cast<u64>(currCharacter) << 32) | static_cast<u64>(monospace) << 31 | static_cast<u64>(std::bit_cast<u64>(fontSize));
-
-                    Glyph *glyph = nullptr;
-
-                    auto it = s_glyphCache.find(key);
-                    if (it == s_glyphCache.end()) {
-                        /* Cache glyph */
-                        glyph = &s_glyphCache.emplace(key, Glyph()).first->second;
-
-                        if (stbtt_FindGlyphIndex(&this->m_extFont, currCharacter))
-                            glyph->currFont = &this->m_extFont;
-                        else if(this->m_hasLocalFont && stbtt_FindGlyphIndex(&this->m_stdFont, currCharacter)==0)
-                            glyph->currFont = &this->m_localFont;
-                        else
-                            glyph->currFont = &this->m_stdFont;
-
-                        glyph->currFontSize = stbtt_ScaleForPixelHeight(glyph->currFont, fontSize);
-
-                        stbtt_GetCodepointBitmapBoxSubpixel(glyph->currFont, currCharacter, glyph->currFontSize, glyph->currFontSize,
-                                                            0, 0, &glyph->bounds[0], &glyph->bounds[1], &glyph->bounds[2], &glyph->bounds[3]);
-
-                        int yAdvance = 0;
-                        stbtt_GetCodepointHMetrics(glyph->currFont, monospace ? 'W' : currCharacter, &glyph->xAdvance, &yAdvance);
-
-                        glyph->glyphBmp = stbtt_GetCodepointBitmap(glyph->currFont, glyph->currFontSize, glyph->currFontSize, currCharacter, &glyph->width, &glyph->height, nullptr, nullptr);
-                    } else {
-                        /* Use cached glyph */
-                        glyph = &it->second;
-                    }
-
-                    if (glyph->glyphBmp != nullptr && !std::iswspace(currCharacter) && fontSize > 0 && color.a != 0x0) {
-
-                        auto x = currX + glyph->bounds[0];
-                        auto y = currY + glyph->bounds[1];
-                        for (s32 bmpY = 0; bmpY < glyph->height; bmpY++) {
-                            for (s32 bmpX = 0; bmpX < glyph->width; bmpX++) {
-                                auto bmpColor = glyph->glyphBmp[glyph->width * bmpY + bmpX] >> 4;
-                                if (bmpColor == 0xF) {
-                                    this->setPixel(x + bmpX, y + bmpY, color);
-                                } else if (bmpColor != 0x0) {
-                                    Color tmpColor = color;
-                                    tmpColor.a = bmpColor * (double(tmpColor.a) / 0xF);
-                                    this->setPixelBlendDst(x + bmpX, y + bmpY, tmpColor);
-                                }
-                            }
-                        }
-
-                    }
-
-                    currX += static_cast<s32>(glyph->xAdvance * glyph->currFontSize);
-
-                } while (*string != '\0');
-
-                maxX = std::max(currX, maxX);
-
-                return { maxX - x, currY - y };
-            }
-
-            /**
-             * @brief Limit a strings length and end it with "…"
-             *
-             * @param string String to truncate
-             * @param maxLength Maximum length of string
-             */
-            std::string limitStringLength(std::string string, bool monospace, double fontSize, s32 maxLength) {
-                if (string.size() < 2)
-                    return string;
-
-                s32 currX = 0;
-                ssize_t strPos = 0;
-                ssize_t codepointWidth;
-
-                do {
-                    u32 currCharacter;
-                    codepointWidth = decode_utf8(&currCharacter, reinterpret_cast<const u8*>(&string[strPos]));
-
-                    if (codepointWidth <= 0)
-                        break;
-
-                    strPos += codepointWidth;
-
-                    stbtt_fontinfo *currFont = nullptr;
-
-                    if (stbtt_FindGlyphIndex(&this->m_extFont, currCharacter))
-                        currFont = &this->m_extFont;
-                    else if(this->m_hasLocalFont && stbtt_FindGlyphIndex(&this->m_stdFont, currCharacter)==0)
-                        currFont = &this->m_localFont;
-                    else
-                        currFont = &this->m_stdFont;
-
-                    double currFontSize = stbtt_ScaleForPixelHeight(currFont, fontSize);
-
-                    int xAdvance = 0, yAdvance = 0;
-                    stbtt_GetCodepointHMetrics(currFont, monospace ? 'W' : currCharacter, &xAdvance, &yAdvance);
-
-                    currX += static_cast<s32>(xAdvance * currFontSize);
-
-                } while (string[strPos] != '\0' && string[strPos] != '\n' && currX < maxLength);
-
-                string = string.substr(0, strPos - codepointWidth) + "…";
-                string.shrink_to_fit();
-
-                return string;
-            }
-
-        private:
-            Renderer() {}
-
-            /**
-             * @brief Gets the renderer instance
-             *
-             * @return Renderer
-             */
-            static Renderer& get() {
-                static Renderer renderer;
-
-                return renderer;
-            }
-
-            /**
-             * @brief Sets the opacity of the layer
-             *
-             * @param opacity Opacity
-             */
-            static void setOpacity(double opacity) {
-                opacity = std::clamp(opacity, (double)0.0, (double)1.0);
-
-                Renderer::s_opacity = opacity;
-            }
-
-            bool m_initialized = false;
-            ViDisplay m_display;
-            ViLayer m_layer;
-            Event m_vsyncEvent;
-
-            NWindow m_window;
-            Framebuffer m_framebuffer;
-            void *m_currentFramebuffer = nullptr;
-
-            std::stack<ScissoringConfig> m_scissoringStack;
-
-            stbtt_fontinfo m_stdFont, m_localFont, m_extFont;
-            bool m_hasLocalFont = false;
-
-            static inline double s_opacity = 1.0F;
-
-            /**
-             * @brief Get the current framebuffer address
-             *
-             * @return Framebuffer address
-             */
-            inline void* getCurrentFramebuffer() {
-                return this->m_currentFramebuffer;
-            }
-
-            /**
-             * @brief Get the next framebuffer address
-             *
-             * @return Next framebuffer address
-             */
-            inline void* getNextFramebuffer() {
-                return static_cast<u8*>(this->m_framebuffer.buf) + this->getNextFramebufferSlot() * this->getFramebufferSize();
-            }
-
-            /**
-             * @brief Get the framebuffer size
-             *
-             * @return Framebuffer size
-             */
-            inline size_t getFramebufferSize() {
-                return this->m_framebuffer.fb_size;
-            }
-
-            /**
-             * @brief Get the number of framebuffers in use
-             *
-             * @return Number of framebuffers
-             */
-            inline size_t getFramebufferCount() {
-                return this->m_framebuffer.num_fbs;
-            }
-
-            /**
-             * @brief Get the currently used framebuffer's slot
-             *
-             * @return Slot
-             */
-            inline u8 getCurrentFramebufferSlot() {
-                return this->m_window.cur_slot;
-            }
-
-            /**
-             * @brief Get the next framebuffer's slot
-             *
-             * @return Next slot
-             */
-            inline u8 getNextFramebufferSlot() {
-                return (this->getCurrentFramebufferSlot() + 1) % this->getFramebufferCount();
-            }
-
-            /**
-             * @brief Waits for the vsync event
-             *
-             */
-            inline void waitForVSync() {
-                eventWait(&this->m_vsyncEvent, UINT64_MAX);
-            }
-
-            /**
-             * @brief Decodes a x and y coordinate into a offset into the swizzled framebuffer
-             *
-             * @param x X pos
-             * @param y Y Pos
-             * @return Offset
-             */
-            u32 getPixelOffset(s32 x, s32 y) {
-                if (!this->m_scissoringStack.empty()) {
-                    auto currScissorConfig = this->m_scissoringStack.top();
-                    if (x < currScissorConfig.x ||
-                        y < currScissorConfig.y ||
-                        x > currScissorConfig.x + currScissorConfig.w ||
-                        y > currScissorConfig.y + currScissorConfig.h)
-                            return UINT32_MAX;
-                }
-
-                u32 tmpPos = ((y & 127) / 16) + (x / 32 * 8) + ((y / 16 / 8) * (((cfg::FramebufferWidth / 2) / 16 * 8)));
-                tmpPos *= 16 * 16 * 4;
-
-                tmpPos += ((y % 16) / 8) * 512 + ((x % 32) / 16) * 256 + ((y % 8) / 2) * 64 + ((x % 16) / 8) * 32 + (y % 2) * 16 + (x % 8) * 2;
-
-                return tmpPos / 2;
-            }
-
-            /**
-             * @brief Initializes the renderer and layers
-             *
-             */
-            void init() {
-
-                cfg::LayerPosX = 0;
-                cfg::LayerPosY = 0;
-                cfg::FramebufferWidth  = 448;
-                cfg::FramebufferHeight = 720;
-                cfg::LayerWidth  = cfg::ScreenHeight * (double(cfg::FramebufferWidth) / double(cfg::FramebufferHeight));
-                cfg::LayerHeight = cfg::ScreenHeight;
-
-                if (this->m_initialized)
-                    return;
-
-                tsl::hlp::doWithSmSession([this]{
-                    ASSERT_FATAL(viInitialize(ViServiceType_Manager));
-                    ASSERT_FATAL(viOpenDefaultDisplay(&this->m_display));
-                    ASSERT_FATAL(viGetDisplayVsyncEvent(&this->m_display, &this->m_vsyncEvent));
-                    ASSERT_FATAL(viCreateManagedLayer(&this->m_display, static_cast<ViLayerFlags>(0), 0, &__nx_vi_layer_id));
-                    ASSERT_FATAL(viCreateLayer(&this->m_display, &this->m_layer));
-                    ASSERT_FATAL(viSetLayerScalingMode(&this->m_layer, ViScalingMode_FitToLayer));
-
-                    if (s32 layerZ = 0; R_SUCCEEDED(viGetZOrderCountMax(&this->m_display, &layerZ)) && layerZ > 0)
-                        ASSERT_FATAL(viSetLayerZ(&this->m_layer, layerZ));
-
-                    ASSERT_FATAL(tsl::hlp::viAddToLayerStack(&this->m_layer, ViLayerStack_Default));
-                    ASSERT_FATAL(tsl::hlp::viAddToLayerStack(&this->m_layer, ViLayerStack_Screenshot));
-                    ASSERT_FATAL(tsl::hlp::viAddToLayerStack(&this->m_layer, ViLayerStack_Recording));
-                    ASSERT_FATAL(tsl::hlp::viAddToLayerStack(&this->m_layer, ViLayerStack_Arbitrary));
-                    ASSERT_FATAL(tsl::hlp::viAddToLayerStack(&this->m_layer, ViLayerStack_LastFrame));
-                    ASSERT_FATAL(tsl::hlp::viAddToLayerStack(&this->m_layer, ViLayerStack_Null));
-                    ASSERT_FATAL(tsl::hlp::viAddToLayerStack(&this->m_layer, ViLayerStack_ApplicationForDebug));
-                    ASSERT_FATAL(tsl::hlp::viAddToLayerStack(&this->m_layer, ViLayerStack_Lcd));
-                    //ASSERT_FATAL(tsl::hlp::viAddToLayerStack(&this->m_layer, 8));
-
-                    ASSERT_FATAL(viSetLayerSize(&this->m_layer, cfg::LayerWidth, cfg::LayerHeight));
-                    ASSERT_FATAL(viSetLayerPosition(&this->m_layer, cfg::LayerPosX, cfg::LayerPosY));
-                    ASSERT_FATAL(nwindowCreateFromLayer(&this->m_window, &this->m_layer));
-                    ASSERT_FATAL(framebufferCreate(&this->m_framebuffer, &this->m_window, cfg::FramebufferWidth, cfg::FramebufferHeight, PIXEL_FORMAT_RGBA_4444, 2));
-                    ASSERT_FATAL(setInitialize());
-                    ASSERT_FATAL(this->initFonts());
-                    setExit();
-                });
-
-                this->m_initialized = true;
-            }
-
-            /**
-             * @brief Exits the renderer and layer
-             *
-             */
-            void exit() {
-                if (!this->m_initialized)
-                    return;
-
-                framebufferClose(&this->m_framebuffer);
-                nwindowClose(&this->m_window);
-                viDestroyManagedLayer(&this->m_layer);
-                viCloseDisplay(&this->m_display);
-                eventClose(&this->m_vsyncEvent);
-                viExit();
-            }
-
-            /**
-             * @brief Initializes Nintendo's shared fonts. Default and Extended
-             *
-             * @return Result
-             */
-            Result initFonts() {
-                static PlFontData stdFontData, localFontData, extFontData;
-
-                // Nintendo's default font
-                TSL_R_TRY(plGetSharedFontByType(&stdFontData, PlSharedFontType_Standard));
-
-                u8 *fontBuffer = reinterpret_cast<u8*>(stdFontData.address);
-                stbtt_InitFont(&this->m_stdFont, fontBuffer, stbtt_GetFontOffsetForIndex(fontBuffer, 0));
-
-                u64 languageCode;
-                if (R_SUCCEEDED(setGetSystemLanguage(&languageCode))) {
-                    // Check if need localization font
-                    SetLanguage setLanguage;
-                    TSL_R_TRY(setMakeLanguage(languageCode, &setLanguage));
-                    this->m_hasLocalFont = true;
-                    switch (setLanguage) {
-                    case SetLanguage_ZHCN:
-                    case SetLanguage_ZHHANS:
-                        TSL_R_TRY(plGetSharedFontByType(&localFontData, PlSharedFontType_ChineseSimplified));
-                        break;
-                    case SetLanguage_KO:
-                        TSL_R_TRY(plGetSharedFontByType(&localFontData, PlSharedFontType_KO));
-                        break;
-                    case SetLanguage_ZHTW:
-                    case SetLanguage_ZHHANT:
-                        TSL_R_TRY(plGetSharedFontByType(&localFontData, PlSharedFontType_ChineseTraditional));
-                        break;
-                    default:
-                        this->m_hasLocalFont = false;
-                        break;
-                    }
-
-                    if (this->m_hasLocalFont) {
-                        fontBuffer = reinterpret_cast<u8*>(localFontData.address);
-                        stbtt_InitFont(&this->m_localFont, fontBuffer, stbtt_GetFontOffsetForIndex(fontBuffer, 0));
-                    }
-                }
-
-                // Nintendo's extended font containing a bunch of icons
-                TSL_R_TRY(plGetSharedFontByType(&extFontData, PlSharedFontType_NintendoExt));
-
-                fontBuffer = reinterpret_cast<u8*>(extFontData.address);
-                stbtt_InitFont(&this->m_extFont, fontBuffer, stbtt_GetFontOffsetForIndex(fontBuffer, 0));
-
-                return 0;
-            }
-
-            /**
-             * @brief Start a new frame
-             * @warning Don't call this more than once before calling \ref endFrame
-             */
-            inline void startFrame() {
-                this->m_currentFramebuffer = framebufferBegin(&this->m_framebuffer, nullptr);
-            }
-
-            /**
-             * @brief End the current frame
-             * @warning Don't call this before calling \ref startFrame once
-             */
-            inline void endFrame() {
-                this->waitForVSync();
-                framebufferEnd(&this->m_framebuffer);
-
-                this->m_currentFramebuffer = nullptr;
-            }
-        };
-
-    }
-
-    // Elements
-
-    namespace elm {
-
-        enum class TouchEvent {
-            Touch,
-            Hold,
-            Scroll,
-            Release
-        };
-
-        /**
-         * @brief The top level Element of the libtesla UI library
-         * @note When creating your own elements, extend from this or one of it's sub classes
-         */
-        class Element {
-        public:
-            Element() {}
-            virtual ~Element() { }
-
-            /**
-             * @brief Handles focus requesting
-             * @note This function should return the element to focus.
-             *       When this element should be focused, return `this`.
-             *       When one of it's child should be focused, return `this->child->requestFocus(oldFocus, direction)`
-             *       When this element is not focusable, return `nullptr`
-             *
-             * @param oldFocus Previously focused element
-             * @param direction Direction in which focus moved. \ref FocusDirection::None is passed for the initial load
-             * @return Element to focus
-             */
-            virtual Element* requestFocus(Element *oldFocus, FocusDirection direction) {
-                return nullptr;
-            }
-
-            /**
-             * @brief Function called when a joycon button got pressed
-             *
-             * @param keys Keys pressed in the last frame
-             * @return true when button press has been consumed
-             * @return false when button press should be passed on to the parent
-             */
-            virtual bool onClick(u64 keys) {
-                return m_clickListener(keys);
-            }
-
-            /**
-             * @brief Called once per frame with the latest HID inputs
-             *
-             * @param keysDown Buttons pressed in the last frame
-             * @param keysHeld Buttons held down longer than one frame
-             * @param touchInput Last touch position
-             * @param leftJoyStick Left joystick position
-             * @param rightJoyStick Right joystick position
-             * @return Weather or not the input has been consumed
-             */
-            virtual bool handleInput(u64 keysDown, u64 keysHeld, const HidTouchState &touchPos, HidAnalogStickState joyStickPosLeft, HidAnalogStickState joyStickPosRight) {
-                return false;
-            }
-
-            /**
-             * @brief Function called when the element got touched
-             * @todo Not yet implemented
-             *
-             * @param x X pos
-             * @param y Y pos
-             * @return true when touch input has been consumed
-             * @return false when touch input should be passed on to the parent
-             */
-            virtual bool onTouch(TouchEvent event, s32 currX, s32 currY, s32 prevX, s32 prevY, s32 initialX, s32 initialY) {
-                return false;
-            }
-
-            /**
-             * @brief Called once per frame to draw the element
-             * @warning Do not call this yourself. Use \ref Element::frame(gfx::Renderer *renderer)
-             *
-             * @param renderer Renderer
-             */
-            virtual void draw(gfx::Renderer *renderer) = 0;
-
-            /**
-             * @brief Called when the underlying Gui gets created and after calling \ref Gui::invalidate() to calculate positions and boundaries of the element
-             * @warning Do not call this yourself. Use \ref Element::invalidate()
-             *
-             * @param parentX Parent X pos
-             * @param parentY Parent Y pos
-             * @param parentWidth Parent Width
-             * @param parentHeight Parent Height
-             */
-            virtual void layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) = 0;
-
-            /**
-             * @brief Draws highlighting and the element itself
-             * @note When drawing children of a element in \ref Element::draw(gfx::Renderer *renderer), use `this->child->frame(renderer)` instead of calling draw directly
-             *
-             * @param renderer
-             */
-            void frame(gfx::Renderer *renderer) {
-                renderer->enableScissoring(0, 0, tsl::cfg::FramebufferWidth, tsl::cfg::FramebufferHeight);
-
-                if (this->m_focused)
-                    this->drawFocusBackground(renderer);
-
-                renderer->disableScissoring();
-
-                this->draw(renderer);
-
-                renderer->enableScissoring(0, 0, tsl::cfg::FramebufferWidth, tsl::cfg::FramebufferHeight);
-
-                if (this->m_focused)
-                    this->drawHighlight(renderer);
-
-                renderer->disableScissoring();
-            }
-
-            /**
-             * @brief Forces a layout recreation of a element
-             *
-             */
-            void invalidate() {
-                const auto& parent = this->getParent();
-
-                if (parent == nullptr)
-                    this->layout(0, 0, cfg::FramebufferWidth, cfg::FramebufferHeight);
-                else
-                    this->layout(ELEMENT_BOUNDS(parent));
-            }
-
-            /**
-             * @brief Shake the highlight in the given direction to signal that the focus cannot move there
-             *
-             * @param direction Direction to shake highlight in
-             */
-            void shakeHighlight(FocusDirection direction) {
-                this->m_highlightShaking = true;
-                this->m_highlightShakingDirection = direction;
-                this->m_highlightShakingStartTime = std::chrono::system_clock::now();
-            }
-
-            /**
-             * @brief Triggers the blue click animation to signal a element has been clicked on
-             *
-             */
-            void triggerClickAnimation() {
-                this->m_clickAnimationProgress = tsl::style::ListItemHighlightLength;
-            }
-
-            /**
-             * @brief Resets the click animation progress, canceling the animation
-             */
-            void resetClickAnimation() {
-                this->m_clickAnimationProgress = 0;
-            }
-
-            /**
-             * @brief Draws the blue highlight animation when clicking on a button
-             * @note Override this if you have a element that e.g requires a non-rectangular animation or a different color
-             *
-             * @param renderer Renderer
-             */
-            virtual void drawClickAnimation(gfx::Renderer *renderer) {
-                Color animColor = tsl::style::color::ColorClickAnimation;
-                u8 saturation = tsl::style::ListItemHighlightSaturation * (double(this->m_clickAnimationProgress) / double(tsl::style::ListItemHighlightLength));
-
-                animColor.g = saturation;
-                animColor.b = saturation;
-
-                renderer->drawRect(ELEMENT_BOUNDS(this), a(animColor));
-            }
-
-            /**
-             * @brief Draws the back background when a element is highlighted
-             * @note Override this if you have a element that e.g requires a non-rectangular focus
-             *
-             * @param renderer Renderer
-             */
-            virtual void drawFocusBackground(gfx::Renderer *renderer) {
-                renderer->drawRect(ELEMENT_BOUNDS(this), a(0xF000));
-
-                if (this->m_clickAnimationProgress > 0) {
-                    this->drawClickAnimation(renderer);
-                    this->m_clickAnimationProgress--;
-                }
-            }
-
-            /**
-             * @brief Draws the blue boarder when a element is highlighted
-             * @note Override this if you have a element that e.g requires a non-rectangular focus
-             *
-             * @param renderer Renderer
-             */
-            virtual void drawHighlight(gfx::Renderer *renderer) {
-                static double counter = 0;
-                const double progress = (std::sin(counter) + 1) / 2;
-                Color highlightColor = {   static_cast<u8>((0x2 - 0x8) * progress + 0x8),
-                                                static_cast<u8>((0x8 - 0xF) * progress + 0xF),
-                                                static_cast<u8>((0xC - 0xF) * progress + 0xF),
-                                                0xF };
-
-                counter += 0.1F;
-
-                s32 x = 0, y = 0;
-
-                if (this->m_highlightShaking) {
-                    auto t = (std::chrono::system_clock::now() - this->m_highlightShakingStartTime);
-                    if (t >= 100ms)
-                        this->m_highlightShaking = false;
-                    else {
-                        s32 amplitude = std::rand() % 5 + 5;
-
-                        switch (this->m_highlightShakingDirection) {
-                            case FocusDirection::Up:
-                                y -= shakeAnimation(t, amplitude);
-                                break;
-                            case FocusDirection::Down:
-                                y += shakeAnimation(t, amplitude);
-                                break;
-                            case FocusDirection::Left:
-                                x -= shakeAnimation(t, amplitude);
-                                break;
-                            case FocusDirection::Right:
-                                x += shakeAnimation(t, amplitude);
-                                break;
-                            default:
-                                break;
-                        }
-
-                        x = std::clamp(x, -amplitude, amplitude);
-                        y = std::clamp(y, -amplitude, amplitude);
-                    }
-                }
-
-                renderer->drawRect(this->getX() + x - 4, this->getY() + y - 4, this->getWidth() + 8, 4, a(highlightColor));
-                renderer->drawRect(this->getX() + x - 4, this->getY() + y + this->getHeight(), this->getWidth() + 8, 4, a(highlightColor));
-                renderer->drawRect(this->getX() + x - 4, this->getY() + y, 4, this->getHeight(), a(highlightColor));
-                renderer->drawRect(this->getX() + x + this->getWidth(), this->getY() + y, 4, this->getHeight(), a(highlightColor));
-
-            }
-
-            /**
-             * @brief Sets the boundaries of this view
-             *
-             * @param x Start X pos
-             * @param y Start Y pos
-             * @param width Width
-             * @param height Height
-             */
-            void setBoundaries(s32 x, s32 y, s32 width, s32 height) {
-                this->m_x = x;
-                this->m_y = y;
-                this->m_width = width;
-                this->m_height = height;
-            }
-
-            /**
-             * @brief Adds a click listener to the element
-             *
-             * @param clickListener Click listener called with keys that were pressed last frame. Callback should return true if keys got consumed
-             */
-            virtual void setClickListener(std::function<bool(u64 keys)> clickListener) {
-                this->m_clickListener = clickListener;
-            }
-
-            /**
-             * @brief Gets the element's X position
-             *
-             * @return X position
-             */
-            inline s32 getX() { return this->m_x; }
-            /**
-             * @brief Gets the element's Y position
-             *
-             * @return Y position
-             */
-            inline s32 getY() { return this->m_y; }
-            /**
-             * @brief Gets the element's Width
-             *
-             * @return Width
-             */
-            inline s32 getWidth() { return this->m_width;  }
-            /**
-             * @brief Gets the element's Height
-             *
-             * @return Height
-             */
-            inline s32 getHeight() { return this->m_height; }
-
-            inline s32 getTopBound() { return this->getY(); }
-            inline s32 getLeftBound() { return this->getX(); }
-            inline s32 getRightBound() { return this->getX() + this->getWidth(); }
-            inline s32 getBottomBound() { return this->getY() + this->getHeight(); }
-
-            /**
-             * @brief Check if the coordinates are in the elements bounds
-             *
-             * @return true if coordinates are in bounds, false otherwise
-             */
-            bool inBounds(s32 touchX, s32 touchY) {
-                return touchX >= this->getLeftBound() && touchX <= this->getRightBound() && touchY >= this->getTopBound() && touchY <= this->getBottomBound();
-            }
-
-            /**
-             * @brief Sets the element's parent
-             * @note This is required to handle focus and button downpassing properly
-             *
-             * @param parent Parent
-             */
-            inline void setParent(Element *parent) { this->m_parent = parent; }
-
-            /**
-             * @brief Get the element's parent
-             *
-             * @return Parent
-             */
-            inline Element* getParent() { return this->m_parent; }
-
-            /**
-             * @brief Marks this element as focused or unfocused to draw the highlight
-             *
-             * @param focused Focused
-             */
-            virtual inline void setFocused(bool focused) {
-                this->m_focused = focused;
-                this->m_clickAnimationProgress = 0;
-            }
-
-
-            static InputMode getInputMode() { return Element::s_inputMode; }
-
-            static void setInputMode(InputMode mode) { Element::s_inputMode = mode; }
-
-        protected:
-            constexpr static inline auto a = &gfx::Renderer::a;
-            bool m_focused = false;
-            u8 m_clickAnimationProgress = 0;
-
-            // Highlight shake animation
-            bool m_highlightShaking = false;
-            std::chrono::system_clock::time_point m_highlightShakingStartTime;
-            FocusDirection m_highlightShakingDirection;
-
-            static inline InputMode s_inputMode;
-
-            /**
-             * @brief Shake animation callculation based on a damped sine wave
-             *
-             * @param t Passed time
-             * @param a Amplitude
-             * @return Damped sine wave output
-             */
-            int shakeAnimation(std::chrono::system_clock::duration t, double a) {
-                double w = 0.2F;
-                double tau = 0.05F;
-
-                int t_ = t.count() / 1'000'000;
-
-                return roundf(a * exp(-(tau * t_) * sin(w * t_)));
-            }
-
-        private:
-            friend class Gui;
-
-            s32 m_x = 0, m_y = 0, m_width = 0, m_height = 0;
-            Element *m_parent = nullptr;
-
-            std::function<bool(u64 keys)> m_clickListener = [](u64) { return false; };
-
-        };
-
-        /**
-         * @brief A Element that exposes the renderer directly to draw custom views easily
-         */
-        class CustomDrawer : public Element {
-        public:
-            /**
-             * @brief Constructor
-             * @note This element should only be used to draw static things the user cannot interact with e.g info text, images, etc.
-             *
-             * @param renderFunc Callback that will be called once every frame to draw this view
-             */
-            CustomDrawer(std::function<void(gfx::Renderer* r, s32 x, s32 y, s32 w, s32 h)> renderFunc) : Element(), m_renderFunc(renderFunc) {}
-            virtual ~CustomDrawer() {}
-
-            virtual void draw(gfx::Renderer* renderer) override {
-                renderer->enableScissoring(ELEMENT_BOUNDS(this));
-                this->m_renderFunc(renderer, ELEMENT_BOUNDS(this));
-                renderer->disableScissoring();
-            }
-
-            virtual void layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) override {
-
-            }
-
-        private:
-            std::function<void(gfx::Renderer*, s32 x, s32 y, s32 w, s32 h)> m_renderFunc;
-        };
-
-
-        /**
-         * @brief The base frame which can contain another view
-         *
-         */
-        class OverlayFrame : public Element {
-        public:
-            /**
-             * @brief Constructor
-             *
-             * @param title Name of the Overlay drawn bolt at the top
-             * @param subtitle Subtitle drawn bellow the title e.g version number
-             */
-            OverlayFrame(const std::string& title, const std::string& subtitle) : Element(), m_title(title), m_subtitle(subtitle) {}
-            virtual ~OverlayFrame() {
-                if (this->m_contentElement != nullptr)
-                    delete this->m_contentElement;
-            }
-
-            virtual void draw(gfx::Renderer *renderer) override {
-                renderer->fillScreen(a(tsl::style::color::ColorFrameBackground));
-                renderer->drawRect(tsl::cfg::FramebufferWidth - 1, 0, 1, tsl::cfg::FramebufferHeight, a(0xF222));
-
-                renderer->drawString(this->m_title.c_str(), false, 20, 50, 30, a(tsl::style::color::ColorText));
-                renderer->drawString(this->m_subtitle.c_str(), false, 20, 70, 15, a(tsl::style::color::ColorDescription));
-
-                renderer->drawRect(15, tsl::cfg::FramebufferHeight - 73, tsl::cfg::FramebufferWidth - 30, 1, a(tsl::style::color::ColorText));
-
-                renderer->drawString("\uE0E1  Back     \uE0E0  OK", false, 30, 693, 23, a(tsl::style::color::ColorText));
-
-                if (this->m_contentElement != nullptr)
-                    this->m_contentElement->frame(renderer);
-            }
-
-            virtual void layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) override {
-                this->setBoundaries(parentX, parentY, parentWidth, parentHeight);
-
-                if (this->m_contentElement != nullptr) {
-                    this->m_contentElement->setBoundaries(parentX + 35, parentY + 125, parentWidth - 85, parentHeight - 73 - 125);
-                    this->m_contentElement->invalidate();
-                }
-            }
-
-            virtual Element* requestFocus(Element *oldFocus, FocusDirection direction) override {
-                if (this->m_contentElement != nullptr)
-                    return this->m_contentElement->requestFocus(oldFocus, direction);
-                else
-                    return nullptr;
-            }
-
-            virtual bool onTouch(TouchEvent event, s32 currX, s32 currY, s32 prevX, s32 prevY, s32 initialX, s32 initialY) {
-                // Discard touches outside bounds
-                if (!this->m_contentElement->inBounds(currX, currY))
-                    return false;
-
-                if (this->m_contentElement != nullptr)
-                    return this->m_contentElement->onTouch(event, currX, currY, prevX, prevY, initialX, initialY);
-                else return false;
-            }
-
-            /**
-             * @brief Sets the content of the frame
-             *
-             * @param content Element
-             */
-            void setContent(Element *content) {
-                if (this->m_contentElement != nullptr)
-                    delete this->m_contentElement;
-
-                this->m_contentElement = content;
-
-                if (content != nullptr) {
-                    this->m_contentElement->setParent(this);
-                    this->invalidate();
-                }
-            }
-
-            /**
-             * @brief Changes the title of the menu
-             *
-             * @param title Title to change to
-             */
-            void setTitle(const std::string &title) {
-                this->m_title = title;
-            }
-
-            /**
-             * @brief Changes the subtitle of the menu
-             *
-             * @param title Subtitle to change to
-             */
-            void setSubtitle(const std::string &subtitle) {
-                this->m_subtitle = subtitle;
-            }
-
-        protected:
-            Element *m_contentElement = nullptr;
-
-            std::string m_title, m_subtitle;
-        };
-
-        /**
-         * @brief The base frame which can contain another view with a customizable header
-         *
-         */
-        class HeaderOverlayFrame : public Element {
-        public:
-            HeaderOverlayFrame(u16 headerHeight = 175) : Element(), m_headerHeight(headerHeight) {}
-            virtual ~HeaderOverlayFrame() {
-                if (this->m_contentElement != nullptr)
-                    delete this->m_contentElement;
-
-                if (this->m_header != nullptr)
-                    delete this->m_header;
-            }
-
-            virtual void draw(gfx::Renderer *renderer) override {
-                renderer->fillScreen(a(tsl::style::color::ColorFrameBackground));
-                renderer->drawRect(tsl::cfg::FramebufferWidth - 1, 0, 1, tsl::cfg::FramebufferHeight, a(0xF222));
-
-                renderer->drawRect(15, tsl::cfg::FramebufferHeight - 73, tsl::cfg::FramebufferWidth - 30, 1, a(tsl::style::color::ColorText));
-
-                renderer->drawString("\uE0E1  Back     \uE0E0  OK", false, 30, 693, 23, a(tsl::style::color::ColorText));
-
-                if (this->m_header != nullptr)
-                    this->m_header->frame(renderer);
-
-                if (this->m_contentElement != nullptr)
-                    this->m_contentElement->frame(renderer);
-            }
-
-            virtual void layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) override {
-                this->setBoundaries(parentX, parentY, parentWidth, parentHeight);
-
-                if (this->m_contentElement != nullptr) {
-                    this->m_contentElement->setBoundaries(parentX + 35, parentY + this->m_headerHeight, parentWidth - 85, parentHeight - 73 - this->m_headerHeight);
-                    this->m_contentElement->invalidate();
-                }
-
-                if (this->m_header != nullptr) {
-                    this->m_header->setBoundaries(parentX, parentY, parentWidth, this->m_headerHeight);
-                    this->m_header->invalidate();
-                }
-            }
-
-            virtual bool onTouch(TouchEvent event, s32 currX, s32 currY, s32 prevX, s32 prevY, s32 initialX, s32 initialY) {
-                // Discard touches outside bounds
-                if (!this->m_contentElement->inBounds(currX, currY))
-                    return false;
-
-                if (this->m_contentElement != nullptr)
-                    return this->m_contentElement->onTouch(event, currX, currY, prevX, prevY, initialX, initialY);
-                else return false;
-            }
-
-            virtual Element* requestFocus(Element *oldFocus, FocusDirection direction) override {
-                if (this->m_contentElement != nullptr)
-                    return this->m_contentElement->requestFocus(oldFocus, direction);
-                else
-                    return nullptr;
-            }
-
-            /**
-             * @brief Sets the content of the frame
-             *
-             * @param content Element
-             */
-            void setContent(Element *content) {
-                if (this->m_contentElement != nullptr)
-                    delete this->m_contentElement;
-
-                this->m_contentElement = content;
-
-                if (content != nullptr) {
-                    this->m_contentElement->setParent(this);
-                    this->invalidate();
-                }
-            }
-
-            /**
-             * @brief Sets the header of the frame
-             *
-             * @param header Header custom drawer
-             */
-            void setHeader(CustomDrawer *header) {
-                if (this->m_header != nullptr)
-                    delete this->m_header;
-
-                this->m_header = header;
-
-                if (header != nullptr) {
-                    this->m_header->setParent(this);
-                    this->invalidate();
-                }
-            }
-
-        protected:
-            Element *m_contentElement = nullptr;
-            CustomDrawer *m_header = nullptr;
-
-            u16 m_headerHeight;
-        };
-
-        /**
-         * @brief Single color rectangle element mainly used for debugging to visualize boundaries
-         *
-         */
-        class DebugRectangle : public Element {
-        public:
-            /**
-             * @brief Constructor
-             *
-             * @param color Color of the rectangle
-             */
-            DebugRectangle(Color color) : Element(), m_color(color) {}
-            virtual ~DebugRectangle() {}
-
-            virtual void draw(gfx::Renderer *renderer) override {
-                renderer->drawRect(ELEMENT_BOUNDS(this), a(this->m_color));
-            }
-
-            virtual void layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) override {}
-
-        private:
-            Color m_color;
-        };
-
-
-        /**
-         * @brief A List containing list items
-         *
-         */
-        class List : public Element {
-        public:
-            /**
-             * @brief Constructor
-             *
-             */
-            List() : Element() {}
-            virtual ~List() {
-                for (auto& item : this->m_items)
-                    delete item;
-            }
-
-            virtual void draw(gfx::Renderer *renderer) override {
-                if (this->m_clearList) {
-                    for (auto& item : this->m_items)
-                        delete item;
-
-                    this->m_items.clear();
-                    this->m_offset = 0;
-                    this->m_focusedIndex = 0;
-                    this->invalidate();
-                    this->m_clearList = false;
-                }
-
-                for (auto [index, element] : this->m_itemsToAdd) {
-                    element->invalidate();
-                    if (index >= 0 && (this->m_items.size() > static_cast<size_t>(index))) {
-                        const auto& it = this->m_items.cbegin() + static_cast<size_t>(index);
-                        this->m_items.insert(it, element);
-                    } else {
-                        this->m_items.push_back(element);
-                    }
-                    this->invalidate();
-                    this->updateScrollOffset();
-                }
-                this->m_itemsToAdd.clear();
-
-                for (auto element : this->m_itemsToRemove) {
-                    for (auto it = m_items.cbegin(); it != m_items.cend(); ++it) {
-                        if (*it == element) {
-                            this->m_items.erase(it);
-                            if (this->m_focusedIndex >= (it - this->m_items.cbegin())) {
-                                this->m_focusedIndex--;
-                            }
-                            this->invalidate();
-                            this->updateScrollOffset();
-                            delete element;
-                            break;
-                        }
-                    }
-                }
-                this->m_itemsToRemove.clear();
-
-                renderer->enableScissoring(this->getLeftBound(), this->getTopBound() - 5, this->getWidth(), this->getHeight() + 4);
-
-                for (auto &entry : this->m_items) {
-                    if (entry->getBottomBound() > this->getTopBound() && entry->getTopBound() < this->getBottomBound()) {
-                        entry->frame(renderer);
-                    }
-                }
-
-                renderer->disableScissoring();
-
-                if (this->m_listHeight > this->getHeight()) {
-                    double scrollbarHeight = static_cast<double>(this->getHeight() * this->getHeight()) / this->m_listHeight;
-                    double scrollbarOffset = (static_cast<double>(this->m_offset)) / static_cast<double>(this->m_listHeight - this->getHeight()) * (this->getHeight() - std::ceil(scrollbarHeight));
-
-                    renderer->drawRect(this->getRightBound() + 10, this->getY() + scrollbarOffset, 5, scrollbarHeight - 50, a(tsl::style::color::ColorHandle));
-                    renderer->drawCircle(this->getRightBound() + 12, this->getY() + scrollbarOffset, 2, true, a(tsl::style::color::ColorHandle));
-                    renderer->drawCircle(this->getRightBound() + 12, this->getY() + scrollbarOffset + scrollbarHeight - 50, 2, true, a(tsl::style::color::ColorHandle));
-
-                    double prevOffset = this->m_offset;
-
-                    if (Element::getInputMode() == InputMode::Controller)
-                        this->m_offset += ((this->m_nextOffset) - this->m_offset) * 0.1F;
-                    else if (Element::getInputMode() == InputMode::TouchScroll)
-                        this->m_offset += ((this->m_nextOffset) - this->m_offset);
-
-                    if (static_cast<u32>(prevOffset) != static_cast<u32>(this->m_offset))
-                        this->invalidate();
-                }
-
-            }
-
-            virtual void layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) override {
-                s32 y = this->getY() - this->m_offset;
-
-                this->m_listHeight = 0;
-                for (auto &entry : this->m_items)
-                    this->m_listHeight += entry->getHeight();
-
-                for (auto &entry : this->m_items) {
-                    entry->setBoundaries(this->getX(), y, this->getWidth(), entry->getHeight());
-                    entry->invalidate();
-                    y += entry->getHeight();
-                }
-            }
-
-            virtual bool onTouch(TouchEvent event, s32 currX, s32 currY, s32 prevX, s32 prevY, s32 initialX, s32 initialY) {
-                bool handled = false;
-
-                // Discard touches out of bounds
-                if (!this->inBounds(currX, currY))
-                    return false;
-
-                // Direct touches to all children
-                for (auto &item : this->m_items)
-                    handled |= item->onTouch(event, currX, currY, prevX, prevY, initialX, initialY);
-
-                if (handled)
-                    return true;
-
-                // Handle scrolling
-                if (event != TouchEvent::Release && Element::getInputMode() == InputMode::TouchScroll) {
-                    if (prevX != 0 && prevY != 0)
-                        this->m_nextOffset += (prevY - currY);
-
-                    if (this->m_nextOffset < 0)
-                        this->m_nextOffset = 0;
-
-                    if (this->m_nextOffset > (this->m_listHeight - this->getHeight()) + 50)
-                        this->m_nextOffset = (this->m_listHeight - this->getHeight() + 50);
-
-                    return true;
-                }
-
-                return false;
-            }
-
-            /**
-             * @brief Adds a new item to the list before the next frame starts
-             *
-             * @param element Element to add
-             * @param index Index in the list where the item should be inserted. -1 or greater list size will insert it at the end
-             * @param height Height of the element. Don't set this parameter for libtesla to try and figure out the size based on the type
-             */
-            void addItem(Element *element, u16 height = 0, ssize_t index = -1) {
-                if (element != nullptr) {
-                    if (height != 0)
-                        element->setBoundaries(this->getX(), this->getY(), this->getWidth(), height);
-
-                    element->setParent(this);
-                    element->invalidate();
-
-                    this->m_itemsToAdd.emplace_back(index, element);
-                }
-            }
-
-            /**
-             * @brief Removes an item form the list and deletes it
-             * @note Item will only be deleted if it was found in the list
-             *
-             * @param element Element to remove from list. Call \ref Gui::removeFocus before.
-             */
-            virtual void removeItem(Element *element) {
-                if (element != nullptr)
-                    this->m_itemsToRemove.emplace_back(element);
-            }
-
-            /**
-             * @brief Try to remove an item from the list
-             *
-             * @param index Index of element in list. Call \ref Gui::removeFocus before.
-             */
-            virtual void removeIndex(size_t index) {
-                if (index < this->m_items.size())
-                    removeItem(this->m_items[index]);
-            }
-
-            /**
-             * @brief Removes all children from the list later on
-             * @warning When clearing a list, make sure none of the its children are focused. Call \ref Gui::removeFocus before.
-             */
-            void clear() {
-                this->m_clearList = true;
-            }
-
-            virtual Element* requestFocus(Element *oldFocus, FocusDirection direction) override {
-                Element *newFocus = nullptr;
-
-                if (this->m_clearList || this->m_itemsToAdd.size() > 0)
-                    return nullptr;
-
-                if (direction == FocusDirection::None) {
-                    u16 i = 0;
-
-                    if (oldFocus == nullptr) {
-                        s32 elementHeight = 0;
-                        while (elementHeight < this->m_offset && i < this->m_items.size() - 1) {
-                            i++;
-                            elementHeight += this->m_items[i]->getHeight();
-                        }
-                    }
-
-                    for (; i < this->m_items.size(); i++) {
-                        newFocus = this->m_items[i]->requestFocus(oldFocus, direction);
-
-                        if (newFocus != nullptr) {
-                            this->m_focusedIndex = i;
-
-                            this->updateScrollOffset();
-                            return newFocus;
-                        }
-                    }
-                } else {
-                    if (direction == FocusDirection::Down) {
-
-                        for (u16 i = this->m_focusedIndex + 1; i < this->m_items.size(); i++) {
-                            newFocus = this->m_items[i]->requestFocus(oldFocus, direction);
-
-                            if (newFocus != nullptr && newFocus != oldFocus) {
-                                this->m_focusedIndex = i;
-
-                                this->updateScrollOffset();
-                                return newFocus;
-                            }
-                        }
-
-                        return oldFocus;
-                    } else if (direction == FocusDirection::Up) {
-                        if (this->m_focusedIndex > 0) {
-
-                            for (u16 i = this->m_focusedIndex - 1; i >= 0; i--) {
-                                if (i > this->m_items.size() || this->m_items[i] == nullptr)
-                                    return oldFocus;
-                                else
-                                    newFocus = this->m_items[i]->requestFocus(oldFocus, direction);
-
-                                if (newFocus != nullptr && newFocus != oldFocus) {
-                                    this->m_focusedIndex = i;
-
-                                    this->updateScrollOffset();
-                                    return newFocus;
-                                }
-                            }
-                        }
-
-                        return oldFocus;
-                    }
-                }
-
-                return oldFocus;
-            }
-
-            /**
-             * @brief Gets the item at the index in the list
-             *
-             * @param index Index position in list
-             * @return Element from list. nullptr for if the index is out of bounds
-             */
-            virtual Element* getItemAtIndex(u32 index) {
-                if (this->m_items.size() <= index)
-                    return nullptr;
-
-                return this->m_items[index];
-            }
-
-            /**
-             * @brief Gets the index in the list of the element passed in
-             *
-             * @param element Element to check
-             * @return Index in list. -1 for if the element isn't a member of the list
-             */
-            virtual s32 getIndexInList(Element *element) {
-                auto it = std::find(this->m_items.begin(), this->m_items.end(), element);
-
-                if (it == this->m_items.end())
-                    return -1;
-
-                return it - this->m_items.begin();
-            }
-
-            virtual void setFocusedIndex(u32 index) {
-                if (this->m_items.size() > index) {
-                    m_focusedIndex = index;
-                    this->updateScrollOffset();
-                }
-            }
-
-        protected:
-            std::vector<Element*> m_items;
-            u16 m_focusedIndex = 0;
-
-            double m_offset = 0, m_nextOffset = 0;
-            s32 m_listHeight = 0;
-
-            bool m_clearList = false;
-            std::vector<Element *> m_itemsToRemove;
-            std::vector<std::pair<ssize_t, Element *>> m_itemsToAdd;
-
-        private:
-
-            virtual void updateScrollOffset() {
-                if (this->getInputMode() != InputMode::Controller)
-                    return;
-
-                if (this->m_listHeight <= this->getHeight()) {
-                    this->m_nextOffset = 0;
-                    this->m_offset = 0;
-
-                    return;
-                }
-
-                this->m_nextOffset = 0;
-                for (u16 i = 0; i < this->m_focusedIndex; i++)
-                    this->m_nextOffset += this->m_items[i]->getHeight();
-
-                this->m_nextOffset -= this->getHeight() / 3;
-
-                if (this->m_nextOffset < 0)
-                    this->m_nextOffset = 0;
-
-                if (this->m_nextOffset > (this->m_listHeight - this->getHeight()) + 50)
-                    this->m_nextOffset = (this->m_listHeight - this->getHeight() + 50);
-            }
-        };
-
-        /**
-         * @brief A item that goes into a list
-         *
-         */
-        class ListItem : public Element {
-        public:
-            /**
-             * @brief Constructor
-             *
-             * @param text Initial description text
-             */
-            ListItem(const std::string& text, const std::string& value = "")
-                : Element(), m_text(text), m_value(value) {
-            }
-            virtual ~ListItem() {}
-
-            virtual void draw(gfx::Renderer *renderer) override {
-                if (this->m_touched && Element::getInputMode() == InputMode::Touch) {
-                    renderer->drawRect(ELEMENT_BOUNDS(this), a(tsl::style::color::ColorClickAnimation));
-                }
-
-                if (this->m_maxWidth == 0) {
-                    if (this->m_value.length() > 0) {
-                        auto [valueWidth, valueHeight] = renderer->drawString(this->m_value.c_str(), false, 0, 0, 20, tsl::style::color::ColorTransparent);
-                        this->m_maxWidth = this->getWidth() - valueWidth - 70;
-                    } else {
-                        this->m_maxWidth = this->getWidth() - 40;
-                    }
-
-                    auto [width, height] = renderer->drawString(this->m_text.c_str(), false, 0, 0, 23, tsl::style::color::ColorTransparent);
-                    this->m_trunctuated = width > this->m_maxWidth;
-
-                    if (this->m_trunctuated) {
-                        this->m_scrollText = this->m_text + "        ";
-                        auto [width, height] = renderer->drawString(this->m_scrollText.c_str(), false, 0, 0, 23, tsl::style::color::ColorTransparent);
-                        this->m_scrollText += this->m_text;
-                        this->m_textWidth = width;
-                        this->m_ellipsisText = renderer->limitStringLength(this->m_text, false, 22, this->m_maxWidth);
-                    } else {
-                        this->m_textWidth = width;
-                    }
-                }
-
-                renderer->drawRect(this->getX(), this->getY(), this->getWidth(), 1, a(tsl::style::color::ColorFrame));
-                renderer->drawRect(this->getX(), this->getTopBound(), this->getWidth(), 1, a(tsl::style::color::ColorFrame));
-
-                if (this->m_trunctuated) {
-                    if (this->m_focused) {
-                        renderer->enableScissoring(this->getX(), this->getY(), this->m_maxWidth + 40, this->getHeight());
-                        renderer->drawString(this->m_scrollText.c_str(), false, this->getX() + 20 - this->m_scrollOffset, this->getY() + 45, 23, tsl::style::color::ColorText);
-                        renderer->disableScissoring();
-                        if (this->m_scrollAnimationCounter == 90) {
-                            if (this->m_scrollOffset == this->m_textWidth) {
-                                this->m_scrollOffset = 0;
-                                this->m_scrollAnimationCounter = 0;
-                            } else {
-                                this->m_scrollOffset++;
-                            }
-                        } else {
-                            this->m_scrollAnimationCounter++;
-                        }
-                    } else {
-                        renderer->drawString(this->m_ellipsisText.c_str(), false, this->getX() + 20, this->getY() + 45, 23, a(tsl::style::color::ColorText));
-                    }
-                } else {
-                    renderer->drawString(this->m_text.c_str(), false, this->getX() + 20, this->getY() + 45, 23, a(tsl::style::color::ColorText));
-                }
-
-                renderer->drawString(this->m_value.c_str(), false, this->getX() + this->m_maxWidth + 45, this->getY() + 45, 20, this->m_faint ? a(tsl::style::color::ColorDescription) : a(tsl::style::color::ColorHighlight));
-            }
-
-            virtual void layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) override {
-                this->setBoundaries(this->getX(), this->getY(), this->getWidth(), tsl::style::ListItemDefaultHeight);
-            }
-
-            virtual bool onClick(u64 keys) override {
-                if (keys & HidNpadButton_A)
-                    this->triggerClickAnimation();
-                else if (keys & (HidNpadButton_AnyUp | HidNpadButton_AnyDown | HidNpadButton_AnyLeft | HidNpadButton_AnyRight))
-                    this->m_clickAnimationProgress = 0;
-
-                return Element::onClick(keys);
-            }
-
-
-            virtual bool onTouch(TouchEvent event, s32 currX, s32 currY, s32 prevX, s32 prevY, s32 initialX, s32 initialY) override {
-                if (event == TouchEvent::Touch)
-                    this->m_touched = this->inBounds(currX, currY);
-
-                if (event == TouchEvent::Release && this->m_touched) {
-                    this->m_touched = false;
-
-                    if (Element::getInputMode() == InputMode::Touch) {
-                        bool handled = this->onClick(HidNpadButton_A);
-
-                        this->m_clickAnimationProgress = 0;
-                        return handled;
-                    }
-                }
-
-
-                return false;
-            }
-
-
-            virtual void setFocused(bool state) override {
-                this->m_scroll = false;
-                this->m_scrollOffset = 0;
-                this->m_scrollAnimationCounter = 0;
-                Element::setFocused(state);
-            }
-
-            virtual Element* requestFocus(Element *oldFocus, FocusDirection direction) override {
-                return this;
-            }
-
-            /**
-             * @brief Sets the left hand description text of the list item
-             *
-             * @param text Text
-             */
-            inline void setText(const std::string& text) {
-                this->m_text = text;
-                this->m_scrollText = "";
-                this->m_ellipsisText = "";
-                this->m_maxWidth = 0;
-            }
-
-            /**
-             * @brief Sets the right hand value text of the list item
-             *
-             * @param value Text
-             * @param faint Should the text be drawn in a glowing green or a faint gray
-             */
-            inline void setValue(const std::string& value, bool faint = false) {
-                this->m_value = value;
-                this->m_faint = faint;
-                this->m_maxWidth = 0;
-            }
-
-            /**
-             * @brief Gets the left hand description text of the list item
-             *
-             * @return Text
-             */
-            inline const std::string& getText() const {
-                return this->m_text;
-            }
-
-            /**
-             * @brief Gets the right hand value text of the list item
-             *
-             * @return Value
-             */
-            inline const std::string& getValue() {
-                return this->m_value;
-            }
-
-        protected:
-            std::string m_text;
-            std::string m_value = "";
-            std::string m_scrollText = "";
-            std::string m_ellipsisText = "";
-
-            bool m_scroll = false;
-            bool m_trunctuated = false;
-            bool m_faint = false;
-
-            bool m_touched = false;
-
-            u16 m_maxScroll = 0;
-            u16 m_scrollOffset = 0;
-            u32 m_maxWidth = 0;
-            u32 m_textWidth = 0;
-            u16 m_scrollAnimationCounter = 0;
-        };
-
-        /**
-         * @brief A toggleable list item that changes the state from On to Off when the A button gets pressed
-         *
-         */
-        class ToggleListItem : public ListItem {
-        public:
-            /**
-             * @brief Constructor
-             *
-             * @param text Initial description text
-             * @param initialState Is the toggle set to On or Off initially
-             * @param onValue Value drawn if the toggle is on
-             * @param offValue Value drawn if the toggle is off
-             */
-            ToggleListItem(const std::string& text, bool initialState, const std::string& onValue = "On", const std::string& offValue = "Off")
-                : ListItem(text), m_state(initialState), m_onValue(onValue), m_offValue(offValue) {
-
-                this->setState(this->m_state);
-            }
-
-            virtual ~ToggleListItem() {}
-
-            virtual bool onClick(u64 keys) override {
-                if (keys & HidNpadButton_A) {
-                    this->m_state = !this->m_state;
-
-                    this->setState(this->m_state);
-                    this->m_stateChangedListener(this->m_state);
-
-                    return ListItem::onClick(keys);
-                }
-
-                return false;
-            }
-
-            /**
-             * @brief Gets the current state of the toggle
-             *
-             * @return State
-             */
-            virtual inline bool getState() {
-                return this->m_state;
-            }
-
-            /**
-             * @brief Sets the current state of the toggle. Updates the Value
-             *
-             * @param state State
-             */
-            virtual void setState(bool state) {
-                this->m_state = state;
-
-                if (state)
-                    this->setValue(this->m_onValue, false);
-                else
-                    this->setValue(this->m_offValue, true);
-            }
-
-            /**
-             * @brief Adds a listener that gets called whenever the state of the toggle changes
-             *
-             * @param stateChangedListener Listener with the current state passed in as parameter
-             */
-            void setStateChangedListener(std::function<void(bool)> stateChangedListener) {
-                this->m_stateChangedListener = stateChangedListener;
-            }
-
-        protected:
-            bool m_state = true;
-            std::string m_onValue, m_offValue;
-
-            std::function<void(bool)> m_stateChangedListener = [](bool){};
-        };
-
-        class CategoryHeader : public Element {
-        public:
-            CategoryHeader(const std::string &title, bool hasSeparator = false) : m_text(title), m_hasSeparator(hasSeparator) {}
-            virtual ~CategoryHeader() {}
-
-            virtual void draw(gfx::Renderer *renderer) override {
-                renderer->drawRect(this->getX() - 2, this->getBottomBound() - 30, 5, 23, a(tsl::style::color::ColorHeaderBar));
-                renderer->drawString(this->m_text.c_str(), false, this->getX() + 13, this->getBottomBound() - 12, 15, a(tsl::style::color::ColorText));
-
-                if (this->m_hasSeparator)
-                    renderer->drawRect(this->getX(), this->getBottomBound(), this->getWidth(), 1, a(tsl::style::color::ColorFrame));
-            }
-
-            virtual void layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) override {
-                // Check if the CategoryHeader is part of a list and if it's the first entry in it, half it's height
-                if (List *list = dynamic_cast<List*>(this->getParent()); list != nullptr) {
-                    if (list->getIndexInList(this) == 0) {
-                        this->setBoundaries(this->getX(), this->getY(), this->getWidth(), tsl::style::ListItemDefaultHeight / 2);
-                        return;
-                    }
-                }
-
-                this->setBoundaries(this->getX(), this->getY(), this->getWidth(), tsl::style::ListItemDefaultHeight);
-            }
-
-            virtual bool onClick(u64 keys) {
-                return false;
-            }
-
-            virtual Element* requestFocus(Element *oldFocus, FocusDirection direction) override {
-                return nullptr;
-            }
-
-            inline void setText(const std::string &text) {
-                this->m_text = text;
-            }
-
-            inline const std::string& getText() const {
-                return this->m_text;
-            }
-
-        private:
-            std::string m_text;
-            bool m_hasSeparator;
-        };
-
-        /**
-         * @brief A customizable analog trackbar going from 0% to 100% (like the brightness slider)
-         *
-         */
-        class TrackBar : public Element {
-        public:
-            /**
-             * @brief Constructor
-             *
-             * @param icon Icon shown next to the track bar
-             */
-            TrackBar(const char icon[3]) : m_icon(icon) { }
-
-            virtual ~TrackBar() {}
-
-            virtual Element* requestFocus(Element *oldFocus, FocusDirection direction) {
-                return this;
-            }
-
-            virtual bool handleInput(u64 keysDown, u64 keysHeld, const HidTouchState &touchPos, HidAnalogStickState leftJoyStick, HidAnalogStickState rightJoyStick) override {
-                if (keysHeld & HidNpadButton_AnyLeft && keysHeld & HidNpadButton_AnyRight)
-                    return true;
-
-                if (keysHeld & HidNpadButton_AnyLeft) {
-                    if (this->m_value > 0) {
-                        this->m_value--;
-                        this->m_valueChangedListener(this->m_value);
-                        return true;
-                    }
-                }
-
-                if (keysHeld & HidNpadButton_AnyRight) {
-                    if (this->m_value < 100) {
-                        this->m_value++;
-                        this->m_valueChangedListener(this->m_value);
-                        return true;
-                    }
-                }
-
-                return false;
-            }
-
-            virtual bool onTouch(TouchEvent event, s32 currX, s32 currY, s32 prevX, s32 prevY, s32 initialX, s32 initialY) override {
-                if (event == TouchEvent::Release) {
-                    this->m_interactionLocked = false;
-                    return false;
-                }
-
-
-                if (!this->m_interactionLocked && this->inBounds(initialX, initialY)) {
-                    if (currX > this->getLeftBound() + 50 && currX < this->getRightBound() && currY > this->getTopBound() && currY < this->getBottomBound()) {
-                        s16 newValue = (static_cast<double>(currX - (this->getX() + 60)) / static_cast<double>(this->getWidth() - 95)) * 100;
-
-                        if (newValue < 0) {
-                            newValue = 0;
-                        } else if (newValue > 100) {
-                            newValue = 100;
-                        }
-
-                        if (newValue != this->m_value) {
-                            this->m_value = newValue;
-                            this->m_valueChangedListener(this->getProgress());
-                        }
-
-                        return true;
-                    }
-                }
-                else
-                    this->m_interactionLocked = true;
-
-                return false;
-            }
-
-            virtual void draw(gfx::Renderer *renderer) override {
-                renderer->drawRect(this->getX(), this->getY(), this->getWidth(), 1, a(tsl::style::color::ColorFrame));
-                renderer->drawRect(this->getX(), this->getBottomBound(), this->getWidth(), 1, a(tsl::style::color::ColorFrame));
-
-                renderer->drawString(this->m_icon, false, this->getX() + 15, this->getY() + 50, 23, a(tsl::style::color::ColorText));
-
-                u16 handlePos = (this->getWidth() - 95) * static_cast<double>(this->m_value) / 100;
-                renderer->drawCircle(this->getX() + 60, this->getY() + 42, 2, true, a(tsl::style::color::ColorHighlight));
-                renderer->drawCircle(this->getX() + 60 + this->getWidth() - 95, this->getY() + 42, 2, true, a(tsl::style::color::ColorFrame));
-                renderer->drawRect(this->getX() + 60 + handlePos, this->getY() + 40, this->getWidth() - 95 - handlePos, 5, a(tsl::style::color::ColorFrame));
-                renderer->drawRect(this->getX() + 60, this->getY() + 40, handlePos, 5, a(tsl::style::color::ColorHighlight));
-
-                renderer->drawCircle(this->getX() + 62 + handlePos, this->getY() + 42, 18, true, a(tsl::style::color::ColorHandle));
-                renderer->drawCircle(this->getX() + 62 + handlePos, this->getY() + 42, 18, false, a(tsl::style::color::ColorFrame));
-            }
-
-            virtual void layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) override {
-                this->setBoundaries(this->getX(), this->getY(), this->getWidth(), tsl::style::TrackBarDefaultHeight);
-            }
-
-            virtual void drawFocusBackground(gfx::Renderer *renderer) {
-                // No background drawn here in HOS
-            }
-
-            virtual void drawHighlight(gfx::Renderer *renderer) override {
-                static double counter = 0;
-                const double progress = (std::sin(counter) + 1) / 2;
-                Color highlightColor = {   static_cast<u8>((0x2 - 0x8) * progress + 0x8),
-                                                static_cast<u8>((0x8 - 0xF) * progress + 0xF),
-                                                static_cast<u8>((0xC - 0xF) * progress + 0xF),
-                                                static_cast<u8>((0x6 - 0xD) * progress + 0xD) };
-
-                counter += 0.1F;
-
-                u16 handlePos = (this->getWidth() - 95) * static_cast<double>(this->m_value) / 100;
-
-                s32 x = 0;
-                s32 y = 0;
-
-                if (Element::m_highlightShaking) {
-                    auto t = (std::chrono::system_clock::now() - Element::m_highlightShakingStartTime);
-                    if (t >= 100ms)
-                        Element::m_highlightShaking = false;
-                    else {
-                        s32 amplitude = std::rand() % 5 + 5;
-
-                        switch (Element::m_highlightShakingDirection) {
-                            case FocusDirection::Up:
-                                y -= shakeAnimation(t, amplitude);
-                                break;
-                            case FocusDirection::Down:
-                                y += shakeAnimation(t, amplitude);
-                                break;
-                            case FocusDirection::Left:
-                                x -= shakeAnimation(t, amplitude);
-                                break;
-                            case FocusDirection::Right:
-                                x += shakeAnimation(t, amplitude);
-                                break;
-                            default:
-                                break;
-                        }
-
-                        x = std::clamp(x, -amplitude, amplitude);
-                        y = std::clamp(y, -amplitude, amplitude);
-                    }
-                }
-
-                for (u8 i = 16; i <= 19; i++) {
-                    renderer->drawCircle(this->getX() + 62 + x + handlePos, this->getY() + 42 + y, i, false, a(highlightColor));
-                }
-            }
-
-            /**
-             * @brief Gets the current value of the trackbar
-             *
-             * @return State
-             */
-            virtual inline u8 getProgress() {
-                return this->m_value;
-            }
-
-            /**
-             * @brief Sets the current state of the toggle. Updates the Value
-             *
-             * @param state State
-             */
-            virtual void setProgress(u8 value) {
-                this->m_value = value;
-            }
-
-            /**
-             * @brief Adds a listener that gets called whenever the state of the toggle changes
-             *
-             * @param stateChangedListener Listener with the current state passed in as parameter
-             */
-            void setValueChangedListener(std::function<void(u8)> valueChangedListener) {
-                this->m_valueChangedListener = valueChangedListener;
-            }
-
-        protected:
-            const char *m_icon = nullptr;
-            s16 m_value = 0;
-            bool m_interactionLocked = false;
-
-            std::function<void(u8)> m_valueChangedListener = [](u8){};
-        };
-
-
-        /**
-         * @brief A customizable analog trackbar going from 0% to 100% but using discrete steps (Like the volume slider)
-         *
-         */
-        class StepTrackBar : public TrackBar {
-        public:
-            /**
-             * @brief Constructor
-             *
-             * @param icon Icon shown next to the track bar
-             * @param numSteps Number of steps the track bar has
-             */
-            StepTrackBar(const char icon[3], size_t numSteps)
-                : TrackBar(icon), m_numSteps(numSteps) { }
-
-            virtual ~StepTrackBar() {}
-
-            virtual bool handleInput(u64 keysDown, u64 keysHeld, const HidTouchState &touchPos, HidAnalogStickState leftJoyStick, HidAnalogStickState rightJoyStick) override {
-                static u32 tick = 0;
-
-                if (keysHeld & HidNpadButton_AnyLeft && keysHeld & HidNpadButton_AnyRight) {
-                    tick = 0;
-                    return true;
-                }
-
-                if (keysHeld & (HidNpadButton_AnyLeft | HidNpadButton_AnyRight)) {
-                    if ((tick == 0 || tick > 20) && (tick % 3) == 0) {
-                        if (keysHeld & HidNpadButton_AnyLeft && this->m_value > 0) {
-                            this->m_value = std::max(this->m_value - (100 / (this->m_numSteps - 1)), 0);
-                        } else if (keysHeld & HidNpadButton_AnyRight && this->m_value < 100) {
-                            this->m_value = std::min(this->m_value + (100 / (this->m_numSteps - 1)), 100);
-                        } else {
-                            return false;
-                        }
-                        this->m_valueChangedListener(this->getProgress());
-                    }
-                    tick++;
-                    return true;
-                } else {
-                    tick = 0;
-                }
-
-                return false;
-            }
-
-            virtual bool onTouch(TouchEvent event, s32 currX, s32 currY, s32 prevX, s32 prevY, s32 initialX, s32 initialY) override {
-                if (this->inBounds(initialX, initialY)) {
-                    if (currY > this->getTopBound() && currY < this->getBottomBound()) {
-                        s16 newValue = (static_cast<double>(currX - (this->getX() + 60)) / static_cast<double>(this->getWidth() - 95)) * 100;
-
-                        if (newValue < 0) {
-                            newValue = 0;
-                        } else if (newValue > 100) {
-                            newValue = 100;
-                        } else {
-                            newValue = std::round(newValue / (100.0F / (this->m_numSteps - 1))) * (100.0F / (this->m_numSteps - 1));
-                        }
-
-                        if (newValue != this->m_value) {
-                            this->m_value = newValue;
-                            this->m_valueChangedListener(this->getProgress());
-                        }
-
-                        return true;
-                    }
-                }
-
-                return false;
-            }
-
-            /**
-             * @brief Gets the current value of the trackbar
-             *
-             * @return State
-             */
-            virtual inline u8 getProgress() override {
-                return this->m_value / (100 / (this->m_numSteps - 1));
-            }
-
-            /**
-             * @brief Sets the current state of the toggle. Updates the Value
-             *
-             * @param state State
-             */
-            virtual void setProgress(u8 value) override {
-                value = std::min(value, u8(this->m_numSteps - 1));
-                this->m_value = value * (100 / (this->m_numSteps - 1));
-            }
-
-        protected:
-            u8 m_numSteps = 1;
-        };
-
-
-        /**
-         * @brief A customizable trackbar with multiple discrete steps with specific names. Name gets displayed above the bar
-         *
-         */
-        class NamedStepTrackBar : public StepTrackBar {
-        public:
-            /**
-             * @brief Constructor
-             *
-             * @param icon Icon shown next to the track bar
-             * @param stepDescriptions Step names displayed above the track bar
-             */
-            NamedStepTrackBar(const char icon[3], std::initializer_list<std::string> stepDescriptions)
-                : StepTrackBar(icon, stepDescriptions.size()), m_stepDescriptions(stepDescriptions.begin(), stepDescriptions.end()) { }
-
-            virtual ~NamedStepTrackBar() {}
-
-            virtual void draw(gfx::Renderer *renderer) override {
-
-                u16 trackBarWidth = this->getWidth() - 95;
-                u16 stepWidth = trackBarWidth / (this->m_numSteps - 1);
-
-                for (u8 i = 0; i < this->m_numSteps; i++) {
-                    renderer->drawRect(this->getX() + 60 + stepWidth * i, this->getY() + 50, 1, 10, a(tsl::style::color::ColorFrame));
-                }
-
-                u8 currentDescIndex = std::clamp(this->m_value / (100 / (this->m_numSteps - 1)), 0, this->m_numSteps - 1);
-
-                auto [descWidth, descHeight] = renderer->drawString(this->m_stepDescriptions[currentDescIndex].c_str(), false, 0, 0, 15, tsl::style::color::ColorTransparent);
-                renderer->drawString(this->m_stepDescriptions[currentDescIndex].c_str(), false, ((this->getX() + 60) + (this->getWidth() - 95) / 2) - (descWidth / 2), this->getY() + 20, 15, a(tsl::style::color::ColorDescription));
-
-                StepTrackBar::draw(renderer);
-            }
-
-        protected:
-            std::vector<std::string> m_stepDescriptions;
-        };
-
-    }
-
-    // GUI
-
-    /**
-     * @brief The top level Gui class
-     * @note The main menu and every sub menu are a separate Gui. Create your own Gui class that extends from this one to create your own menus
-     *
-     */
-    class Gui {
-    public:
-        Gui() { }
-
-        virtual ~Gui() {
-            if (this->m_topElement != nullptr)
-                delete this->m_topElement;
-        }
-
-        /**
-         * @brief Creates all elements present in this Gui
-         * @note Implement this function and let it return a heap allocated element used as the top level element. This is usually some kind of frame e.g \ref OverlayFrame
-         *
-         * @return Top level element
-         */
-        virtual elm::Element* createUI() = 0;
-
-        /**
-         * @brief Called once per frame to update values
-         *
-         */
-        virtual void update() {}
-
-        /**
-         * @brief Called once per frame with the latest HID inputs
-         *
-         * @param keysDown Buttons pressed in the last frame
-         * @param keysHeld Buttons held down longer than one frame
-         * @param touchInput Last touch position
-         * @param leftJoyStick Left joystick position
-         * @param rightJoyStick Right joystick position
-         * @return Weather or not the input has been consumed
-         */
-        virtual bool handleInput(u64 keysDown, u64 keysHeld, const HidTouchState &touchPos, HidAnalogStickState leftJoyStick, HidAnalogStickState rightJoyStick) {
-            return false;
-        }
-
-        /**
-         * @brief Gets the top level element
-         *
-         * @return Top level element
-         */
-        elm::Element* getTopElement() {
-            return this->m_topElement;
-        }
-
-        /**
-         * @brief Get the currently focused element
-         *
-         * @return Focused element
-         */
-        elm::Element* getFocusedElement() {
-            return this->m_focusedElement;
-        }
-
-        /**
-         * @brief Requests focus to a element
-         * @note Use this function when focusing a element outside of a element's requestFocus function
-         *
-         * @param element Element to focus
-         * @param direction Focus direction
-         */
-        void requestFocus(elm::Element *element, FocusDirection direction, bool shake = true) {
-            elm::Element *oldFocus = this->m_focusedElement;
-
-            if (element != nullptr) {
-                this->m_focusedElement = element->requestFocus(oldFocus, direction);
-
-                if (oldFocus != nullptr)
-                    oldFocus->setFocused(false);
-
-                if (this->m_focusedElement != nullptr) {
-                    this->m_focusedElement->setFocused(true);
-                }
-            }
-
-            if (shake && oldFocus == this->m_focusedElement && this->m_focusedElement != nullptr)
-                this->m_focusedElement->shakeHighlight(direction);
-        }
-
-        /**
-         * @brief Removes focus from a element
-         *
-         * @param element Element to remove focus from. Pass nullptr to remove the focus unconditionally
-         */
-        void removeFocus(elm::Element* element = nullptr) {
-            if (element == nullptr || element == this->m_focusedElement) {
-                if (this->m_focusedElement != nullptr) {
-                    this->m_focusedElement->setFocused(false);
-                    this->m_focusedElement = nullptr;
-                }
-            }
-        }
-
-        void restoreFocus() {
-            this->m_initialFocusSet = false;
-        }
-
-    protected:
-        constexpr static inline auto a = &gfx::Renderer::a;
-
-    private:
-        elm::Element *m_focusedElement = nullptr;
-        elm::Element *m_topElement = nullptr;
-
-        bool m_initialFocusSet = false;
-
-        friend class Overlay;
-        friend class gfx::Renderer;
-
-        /**
-         * @brief Draws the Gui
-         *
-         * @param renderer
-         */
-        void draw(gfx::Renderer *renderer) {
-            if (this->m_topElement != nullptr)
-                this->m_topElement->draw(renderer);
-        }
-
-        bool initialFocusSet() {
-            return this->m_initialFocusSet;
-        }
-
-        void markInitialFocusSet() {
-            this->m_initialFocusSet = true;
-        }
-
-    };
-
-
-    // Overlay
-
-    /**
-     * @brief The top level Overlay class
-     * @note Every Tesla overlay should have exactly one Overlay class initializing services and loading the default Gui
-     */
-    class Overlay {
-    protected:
-        /**
-         * @brief Constructor
-         * @note Called once when the Overlay gets loaded
-         */
-        Overlay() {}
-    public:
-        /**
-         * @brief Deconstructor
-         * @note Called once when the Overlay exits
-         *
-         */
-        virtual ~Overlay() {}
-
-        /**
-         * @brief Initializes services
-         * @note Called once at the start to initializes services. You have a sm session available during this call, no need to initialize sm yourself
-         */
-        virtual void initServices() {}
-
-        /**
-         * @brief Exits services
-         * @note Make sure to exit all services you initialized in \ref Overlay::initServices() here to prevent leaking handles
-         */
-        virtual void exitServices() {}
-
-        /**
-         * @brief Called before overlay changes from invisible to visible state
-         *
-         */
-        virtual void onShow() {}
-
-        /**
-         * @brief Called before overlay changes from visible to invisible state
-         *
-         */
-        virtual void onHide() {}
-
-        /**
-         * @brief Loads the default Gui
-         * @note This function should return the initial Gui to load using the \ref Gui::initially<T>(Args.. args) function
-         *       e.g `return initially<GuiMain>();`
-         *
-         * @return Default Gui
-         */
-        virtual std::unique_ptr<tsl::Gui> loadInitialGui() = 0;
-
-        /**
-         * @brief Gets a reference to the current Gui on top of the Gui stack
-         *
-         * @return Current Gui reference
-         */
-        std::unique_ptr<tsl::Gui>& getCurrentGui() {
-            return this->m_guiStack.top();
-        }
-
-        /**
-         * @brief Shows the Gui
-         *
-         */
-        void show() {
-            if (this->m_disableNextAnimation) {
-                this->m_animationCounter = 5;
-                this->m_disableNextAnimation = false;
-            }
-            else {
-                this->m_fadeInAnimationPlaying = true;
-                this->m_animationCounter = 0;
-            }
-
-            this->onShow();
-
-            if (auto& currGui = this->getCurrentGui(); currGui != nullptr)
-                currGui->restoreFocus();
-        }
-
-        /**
-         * @brief Hides the Gui
-         *
-         */
-        void hide() {
-            if (this->m_disableNextAnimation) {
-                this->m_animationCounter = 0;
-                this->m_disableNextAnimation = false;
-            }
-            else {
-                this->m_fadeOutAnimationPlaying = true;
-                this->m_animationCounter = 5;
-            }
-
-            this->onHide();
-        }
-
-        /**
-         * @brief Returns whether fade animation is playing
-         *
-         * @return whether fade animation is playing
-         */
-        bool fadeAnimationPlaying() {
-            return this->m_fadeInAnimationPlaying || this->m_fadeOutAnimationPlaying;
-        }
-
-        /**
-         * @brief Closes the Gui
-         * @note This makes the Tesla overlay exit and return back to the Tesla-Menu
-         *
-         */
-        void close() {
-            this->m_shouldClose = true;
-        }
-
-        /**
-         * @brief Gets the Overlay instance
-         *
-         * @return Overlay instance
-         */
-        static inline Overlay* const get() {
-            return Overlay::s_overlayInstance;
-        }
-
-        /**
-         * @brief Creates the initial Gui of an Overlay and moves the object to the Gui stack
-         *
-         * @tparam T
-         * @tparam Args
-         * @param args
-         * @return constexpr std::unique_ptr<T>
-         */
-        template<typename T, typename ... Args>
-        constexpr inline std::unique_ptr<T> initially(Args&&... args) {
-            return std::make_unique<T>(args...);
-        }
-
-    private:
-        using GuiPtr = std::unique_ptr<tsl::Gui>;
-        std::stack<GuiPtr, std::list<GuiPtr>> m_guiStack;
-        static inline Overlay *s_overlayInstance = nullptr;
-
-        bool m_fadeInAnimationPlaying = true, m_fadeOutAnimationPlaying = false;
-        u8 m_animationCounter = 0;
-
-        bool m_shouldHide = false;
-        bool m_shouldClose = false;
-
-        bool m_disableNextAnimation = false;
-
-        bool m_closeOnExit;
-
-        /**
-         * @brief Initializes the Renderer
-         *
-         */
-        void initScreen() {
-            gfx::Renderer::get().init();
-        }
-
-        /**
-         * @brief Exits the Renderer
-         *
-         */
-        void exitScreen() {
-            gfx::Renderer::get().exit();
-        }
-
-        /**
-         * @brief Weather or not the Gui should get hidden
-         *
-         * @return should hide
-         */
-        bool shouldHide() {
-            return this->m_shouldHide;
-        }
-
-        /**
-         * @brief Weather or not hte Gui should get closed
-         *
-         * @return should close
-         */
-        bool shouldClose() {
-            return this->m_shouldClose;
-        }
-
-        /**
-         * @brief Handles fade in and fade out animations of the Overlay
-         *
-         */
-        void animationLoop() {
-            if (this->m_fadeInAnimationPlaying) {
-                this->m_animationCounter++;
-
-                if (this->m_animationCounter >= 5)
-                    this->m_fadeInAnimationPlaying = false;
-            }
-
-            if (this->m_fadeOutAnimationPlaying) {
-                this->m_animationCounter--;
-
-                if (this->m_animationCounter == 0) {
-                    this->m_fadeOutAnimationPlaying = false;
-                    this->m_shouldHide = true;
-                }
-            }
-
-            gfx::Renderer::setOpacity(0.2 * this->m_animationCounter);
-        }
-
-        /**
-         * @brief Main loop
-         *
-         */
-        void loop() {
-            auto& renderer = gfx::Renderer::get();
-
-            renderer.startFrame();
-
-            this->animationLoop();
-            this->getCurrentGui()->update();
-            this->getCurrentGui()->draw(&renderer);
-
-            renderer.endFrame();
-        }
-
-        /**
-         * @brief Called once per frame with the latest HID inputs
-         *
-         * @param keysDown Buttons pressed in the last frame
-         * @param keysHeld Buttons held down longer than one frame
-         * @param touchInput Last touch position
-         * @param leftJoyStick Left joystick position
-         * @param rightJoyStick Right joystick position
-         * @return Weather or not the input has been consumed
-         */
-        void handleInput(u64 keysDown, u64 keysHeld, bool touchDetected, const HidTouchState &touchPos, HidAnalogStickState joyStickPosLeft, HidAnalogStickState joyStickPosRight) {
-            static HidTouchState initialTouchPos = { 0 };
-            static HidTouchState oldTouchPos = { 0 };
-            static bool oldTouchDetected = false;
-            static elm::TouchEvent touchEvent;
-            static u32 repeatTick = 0;
-
-            auto& currentGui = this->getCurrentGui();
-
-            if (currentGui == nullptr)
-                return;
-
-            auto currentFocus = currentGui->getFocusedElement();
-            auto topElement = currentGui->getTopElement();
-
-            if (currentFocus == nullptr) {
-                if (keysDown & HidNpadButton_B) {
-                    if (!currentGui->handleInput(HidNpadButton_B, 0,{},{},{}))
-                        this->goBack();
-                    return;
-                }
-
-                if (topElement == nullptr)
-                    return;
-                else if (currentGui != nullptr) {
-                    if (!currentGui->initialFocusSet() || keysDown & (HidNpadButton_AnyUp | HidNpadButton_AnyDown | HidNpadButton_AnyLeft | HidNpadButton_AnyRight)) {
-                        currentGui->requestFocus(topElement, FocusDirection::None);
-                        currentGui->markInitialFocusSet();
-                        repeatTick = 1;
-                    }
-                }
-            }
-
-            bool handled = false;
-            elm::Element *parentElement = currentFocus;
-
-            while (!handled && parentElement != nullptr) {
-                handled = parentElement->onClick(keysDown);
-                parentElement = parentElement->getParent();
-            }
-
-            parentElement = currentFocus;
-            while (!handled && parentElement != nullptr) {
-                handled = parentElement->handleInput(keysDown, keysHeld, touchPos, joyStickPosLeft, joyStickPosRight);
-                parentElement = parentElement->getParent();
-            }
-
-            if (currentGui != this->getCurrentGui())
-                return;
-
-            handled = handled | currentGui->handleInput(keysDown, keysHeld, touchPos, joyStickPosLeft, joyStickPosRight);
-
-            if (!handled && currentFocus != nullptr) {
-                static bool shouldShake = true;
-
-                if ((((keysHeld & HidNpadButton_AnyUp) != 0) + ((keysHeld & HidNpadButton_AnyDown) != 0) + ((keysHeld & HidNpadButton_AnyLeft) != 0) + ((keysHeld & HidNpadButton_AnyRight) != 0)) == 1) {
-                    if ((repeatTick == 0 || repeatTick > 20) && (repeatTick % 4) == 0) {
-                        if (keysHeld & HidNpadButton_AnyUp)
-                            currentGui->requestFocus(currentFocus->getParent(), FocusDirection::Up, shouldShake);
-                        else if (keysHeld & HidNpadButton_AnyDown)
-                            currentGui->requestFocus(currentFocus->getParent(), FocusDirection::Down, shouldShake);
-                        else if (keysHeld & HidNpadButton_AnyLeft)
-                            currentGui->requestFocus(currentFocus->getParent(), FocusDirection::Left, shouldShake);
-                        else if (keysHeld & HidNpadButton_AnyRight)
-                            currentGui->requestFocus(currentFocus->getParent(), FocusDirection::Right, shouldShake);
-
-                        shouldShake = currentGui->getFocusedElement() != currentFocus;
-                    }
-                    repeatTick++;
-                } else {
-                    if (keysDown & HidNpadButton_B)
-                        this->goBack();
-                    repeatTick = 0;
-                    shouldShake = true;
-                }
-            }
-
-            if (!touchDetected && oldTouchDetected) {
-                if (currentGui != nullptr && topElement != nullptr)
-                    topElement->onTouch(elm::TouchEvent::Release, oldTouchPos.x, oldTouchPos.y, oldTouchPos.x, oldTouchPos.y, initialTouchPos.x, initialTouchPos.y);
-            }
-
-            if (touchDetected) {
-
-                u32 xDistance = std::abs(static_cast<s32>(initialTouchPos.x) - static_cast<s32>(touchPos.x));
-                u32 yDistance = std::abs(static_cast<s32>(initialTouchPos.y) - static_cast<s32>(touchPos.y));
-
-                xDistance *= xDistance;
-                yDistance *= yDistance;
-
-                if ((xDistance + yDistance) > 1000) {
-                    elm::Element::setInputMode(InputMode::TouchScroll);
-                    touchEvent = elm::TouchEvent::Scroll;
-                } else {
-                    if (touchEvent != elm::TouchEvent::Scroll)
-                        touchEvent = elm::TouchEvent::Hold;
-                }
-
-                if (!oldTouchDetected) {
-                    initialTouchPos = touchPos;
-                    elm::Element::setInputMode(InputMode::Touch);
-                    currentGui->removeFocus();
-                    touchEvent = elm::TouchEvent::Touch;
-                }
-
-
-                if (currentGui != nullptr && topElement != nullptr)
-                    topElement->onTouch(touchEvent, touchPos.x, touchPos.y, oldTouchPos.x, oldTouchPos.y, initialTouchPos.x, initialTouchPos.y);
-
-                oldTouchPos = touchPos;
-
-                // Hide overlay when touching out of bounds
-                if (touchPos.x >= cfg::FramebufferWidth) {
-                    if (tsl::elm::Element::getInputMode() == tsl::InputMode::Touch) {
-                        oldTouchPos = { 0 };
-                        initialTouchPos = { 0 };
-
-                        this->hide();
-                    }
-                }
-            } else {
-                if (oldTouchPos.x < 150U && oldTouchPos.y > cfg::FramebufferHeight - 73U)
-                    if (initialTouchPos.x < 150U && initialTouchPos.y > cfg::FramebufferHeight - 73U)
-                        if (!currentGui->handleInput(HidNpadButton_B, 0,{},{},{}))
-                            this->goBack();
-
-                elm::Element::setInputMode(InputMode::Controller);
-
-                oldTouchPos = { 0 };
-                initialTouchPos = { 0 };
-            }
-
-            oldTouchDetected = touchDetected;
-        }
-
-        /**
-         * @brief Clears the screen
-         *
-         */
-        void clearScreen() {
-            auto& renderer = gfx::Renderer::get();
-
-            renderer.startFrame();
-            renderer.clearScreen();
-            renderer.endFrame();
-        }
-
-        /**
-         * @brief Reset hide and close flags that were previously set by \ref Overlay::close() or \ref Overlay::hide()
-         *
-         */
-        void resetFlags() {
-            this->m_shouldHide = false;
-            this->m_shouldClose = false;
-        }
-
-        /**
-         * @brief Disables the next animation that would play
-         *
-         */
-        void disableNextAnimation() {
-            this->m_disableNextAnimation = true;
-        }
-
-        /**
-         * @brief Changes to a different Gui
-         *
-         * @param gui Gui to change to
-         * @return Reference to the Gui
-         */
-        std::unique_ptr<tsl::Gui>& changeTo(std::unique_ptr<tsl::Gui>&& gui) {
-            if (this->m_guiStack.top() != nullptr && this->m_guiStack.top()->m_focusedElement != nullptr)
-                this->m_guiStack.top()->m_focusedElement->resetClickAnimation();
-
-            gui->m_topElement = gui->createUI();
-
-            this->m_guiStack.push(std::move(gui));
-
-            return this->m_guiStack.top();
-        }
-
-        /**
-         * @brief Creates a new Gui and changes to it
-         *
-         * @tparam G Gui to create
-         * @tparam Args Arguments to pass to the Gui
-         * @param args Arguments to pass to the Gui
-         * @return Reference to the newly created Gui
-         */
-        template<typename G, typename ...Args>
-        std::unique_ptr<tsl::Gui>& changeTo(Args&&... args) {
-            return this->changeTo(std::make_unique<G>(std::forward<Args>(args)...));
-        }
-
-        /**
-         * @brief Pops the top Gui from the stack and goes back to the last one
-         * @note The Overlay gets closes once there are no more Guis on the stack
-         */
-        void goBack() {
-            if (!this->m_closeOnExit && this->m_guiStack.size() == 1) {
-                this->hide();
-                return;
-            }
-
-            if (!this->m_guiStack.empty())
-                this->m_guiStack.pop();
-
-            if (this->m_guiStack.empty())
-                this->close();
-        }
-
-        template<typename G, typename ...Args>
-        friend std::unique_ptr<tsl::Gui>& changeTo(Args&&... args);
-
-        friend void goBack();
-
-        template<typename, tsl::impl::LaunchFlags>
-        friend int loop(int argc, char** argv);
-
-        friend class tsl::Gui;
-    };
-
-
-    namespace impl {
-
-        /**
-         * @brief Data shared between the different threads
-         *
-         */
-        struct SharedThreadData {
-            bool running = false;
-
-            Event comboEvent = { 0 };
-
-            bool overlayOpen = false;
-
-            std::mutex dataMutex;
-            u64 keysDown = 0;
-            u64 keysDownPending = 0;
-            u64 keysHeld = 0;
-            HidTouchScreenState touchState = { 0 };
-            HidAnalogStickState joyStickPosLeft = { 0 }, joyStickPosRight = { 0 };
-        };
-
-
-        /**
-         * @brief Extract values from Tesla settings file
-         *
-         */
-        static void parseOverlaySettings() {
-            hlp::ini::IniData parsedConfig = hlp::ini::readOverlaySettings();
-
-            u64 decodedKeys = hlp::comboStringToKeys(parsedConfig["tesla"]["key_combo"]);
-            if (decodedKeys)
-                tsl::cfg::launchCombo = decodedKeys;
-        }
-
-        /**
-         * @brief Update and save launch combo keys
-         *
-         * @param keys the new combo keys
-         */
-        [[maybe_unused]] static void updateCombo(u64 keys) {
-            tsl::cfg::launchCombo = keys;
-            hlp::ini::updateOverlaySettings({
-                { "tesla", {
-                    { "key_combo", tsl::hlp::keysToComboString(keys) }
-                }}
-            });
-        }
-
-        /**
-         * @brief Background event polling loop thread
-         *
-         * @param args Used to pass in a pointer to a \ref SharedThreadData struct
-         */
-        static void backgroundEventPoller(void *args) {
-            SharedThreadData *shData = static_cast<SharedThreadData*>(args);
-
-            // To prevent focus glitchout, close the overlay immediately when the home button gets pressed
-            Event homeButtonPressEvent = {};
-            hidsysAcquireHomeButtonEventHandle(&homeButtonPressEvent, false);
-            eventClear(&homeButtonPressEvent);
-            hlp::ScopeGuard homeButtonEventGuard([&] { eventClose(&homeButtonPressEvent); });
-
-            // To prevent focus glitchout, close the overlay immediately when the power button gets pressed
-            Event powerButtonPressEvent = {};
-            hidsysAcquireSleepButtonEventHandle(&powerButtonPressEvent, false);
-            eventClear(&powerButtonPressEvent);
-            hlp::ScopeGuard powerButtonEventGuard([&] { eventClose(&powerButtonPressEvent); });
-
-            // Parse Tesla settings
-            impl::parseOverlaySettings();
-
-            // Configure input to take all controllers and up to 8
-            padConfigureInput(8, HidNpadStyleSet_NpadStandard | HidNpadStyleTag_NpadSystemExt);
-
-            // Initialize pad
-            PadState pad;
-            padInitializeAny(&pad);
-
-            // Initialize touch screen
-            hidInitializeTouchScreen();
-
-            // Drop all inputs from the previous overlay
-            padUpdate(&pad);
-
-            enum WaiterObject {
-                WaiterObject_HomeButton,
-                WaiterObject_PowerButton,
-
-                WaiterObject_Count
-            };
-
-            // Construct waiter
-            Waiter objects[2] = {
-                [WaiterObject_HomeButton] = waiterForEvent(&homeButtonPressEvent),
-                [WaiterObject_PowerButton] = waiterForEvent(&powerButtonPressEvent),
-            };
-
-            while (shData->running) {
-                // Scan for input changes
-                padUpdate(&pad);
-
-                // Read in HID values
-                {
-                    std::scoped_lock lock(shData->dataMutex);
-
-                    shData->keysDown = padGetButtonsDown(&pad);
-                    shData->keysHeld = padGetButtons(&pad);
-                    shData->joyStickPosLeft  = padGetStickPos(&pad, 0);
-                    shData->joyStickPosRight = padGetStickPos(&pad, 1);
-
-                    // Read in touch positions
-                    if (hidGetTouchScreenStates(&shData->touchState, 1) == 0)
-                        shData->touchState = { 0 };
-
-                    if (((shData->keysHeld & tsl::cfg::launchCombo) == tsl::cfg::launchCombo) && shData->keysDown & tsl::cfg::launchCombo) {
-                        if (shData->overlayOpen) {
-                            tsl::Overlay::get()->hide();
-                            shData->overlayOpen = false;
-                        }
-                        else
-                            eventFire(&shData->comboEvent);
-                    }
-
-                    shData->keysDownPending |= shData->keysDown;
-                }
-
-                //20 ms
-                s32 idx = 0;
-                Result rc = waitObjects(&idx, objects, WaiterObject_Count, 20'000'000ul);
-                if (R_SUCCEEDED(rc)) {
-                    if (shData->overlayOpen) {
-                        tsl::Overlay::get()->hide();
-                        shData->overlayOpen = false;
-                    }
-
-                    switch (idx) {
-                        case WaiterObject_HomeButton:
-                            eventClear(&homeButtonPressEvent);
-                            break;
-                        case WaiterObject_PowerButton:
-                            eventClear(&powerButtonPressEvent);
-                            break;
-                    }
-                } else if (rc != KERNELRESULT(TimedOut)) {
-                    ASSERT_FATAL(rc);
-                }
-            }
-        }
-
-    }
-
-    /**
-     * @brief Creates a new Gui and changes to it
-     *
-     * @tparam G Gui to create
-     * @tparam Args Arguments to pass to the Gui
-     * @param args Arguments to pass to the Gui
-     * @return Reference to the newly created Gui
-     */
-    template<typename G, typename ...Args>
-    std::unique_ptr<tsl::Gui>& changeTo(Args&&... args) {
-        return Overlay::get()->changeTo<G, Args...>(std::forward<Args>(args)...);
-    }
-
-    /**
-     * @brief Pops the top Gui from the stack and goes back to the last one
-     * @note The Overlay gets closed once there are no more Guis on the stack
-     */
-    static void goBack() {
-        Overlay::get()->goBack();
-    }
-
-    static void setNextOverlay(const std::string& ovlPath, std::string origArgs) {
-
-        std::string args = std::filesystem::path(ovlPath).filename();
-        args += " " + origArgs + " --skipCombo";
-
-        envSetNextLoad(ovlPath.c_str(), args.c_str());
-    }
-
-
-
-    /**
-     * @brief libtesla's main function
-     * @note Call it directly from main passing in argc and argv and returning it e.g `return tsl::loop<OverlayTest>(argc, argv);`
-     *
-     * @tparam TOverlay Your overlay class
-     * @tparam launchFlags \ref LaunchFlags
-     * @param argc argc
-     * @param argv argv
-     * @return int result
-     */
-    template<typename TOverlay, impl::LaunchFlags launchFlags>
-    static inline int loop(int argc, char** argv) {
-        static_assert(std::is_base_of_v<tsl::Overlay, TOverlay>, "tsl::loop expects a type derived from tsl::Overlay");
-
-        impl::SharedThreadData shData;
-
-        shData.running = true;
-
-        Thread backgroundThread;
-        threadCreate(&backgroundThread, impl::backgroundEventPoller, &shData, nullptr, 0x1000, 0x2c, -2);
-        threadStart(&backgroundThread);
-
-        eventCreate(&shData.comboEvent, false);
-
-        auto& overlay = tsl::Overlay::s_overlayInstance;
-        overlay = new TOverlay();
-        overlay->m_closeOnExit = (u8(launchFlags) & u8(impl::LaunchFlags::CloseOnExit)) == u8(impl::LaunchFlags::CloseOnExit);
-
-        tsl::hlp::doWithSmSession([&overlay]{ overlay->initServices(); });
-        overlay->initScreen();
-        overlay->changeTo(overlay->loadInitialGui());
-
-
-        // Argument parsing
-        for (u8 arg = 0; arg < argc; arg++) {
-            if (strcasecmp(argv[arg], "--skipCombo") == 0) {
-                eventFire(&shData.comboEvent);
-                overlay->disableNextAnimation();
-            }
-        }
-
-
-        while (shData.running) {
-
-            eventWait(&shData.comboEvent, UINT64_MAX);
-            eventClear(&shData.comboEvent);
-            shData.overlayOpen = true;
-
-
-            hlp::requestForeground(true);
-
-            overlay->show();
-            overlay->clearScreen();
-
-
-            while (shData.running) {
-                overlay->loop();
-
-                {
-                    std::scoped_lock lock(shData.dataMutex);
-                    if (!overlay->fadeAnimationPlaying()) {
-                        overlay->handleInput(shData.keysDownPending, shData.keysHeld, shData.touchState.count, shData.touchState.touches[0], shData.joyStickPosLeft, shData.joyStickPosRight);
-                    }
-                    shData.keysDownPending = 0;
-                }
-
-                if (overlay->shouldHide())
-                    break;
-
-                if (overlay->shouldClose())
-                    shData.running = false;
-            }
-
-            overlay->clearScreen();
-            overlay->resetFlags();
-
-            hlp::requestForeground(false);
-
-            shData.overlayOpen = false;
-            eventClear(&shData.comboEvent);
-        }
-
-        eventClose(&shData.comboEvent);
-
-        threadWaitForExit(&backgroundThread);
-        threadClose(&backgroundThread);
-
-        overlay->exitScreen();
-        overlay->exitServices();
-
-        delete overlay;
-
-        return 0;
-    }
-
-}
-
-
-#ifdef TESLA_INIT_IMPL
-
-namespace tsl::cfg {
-
-    u16 LayerWidth  = 0;
-    u16 LayerHeight = 0;
-    u16 LayerPosX   = 0;
-    u16 LayerPosY   = 0;
-    u16 FramebufferWidth  = 0;
-    u16 FramebufferHeight = 0;
-    u64 launchCombo = HidNpadButton_L | HidNpadButton_Down | HidNpadButton_StickR;
-}
-
-extern "C" {
-
-    u32 __nx_applet_type = AppletType_None;
-    u32 __nx_fs_num_sessions = 1;
-    u32  __nx_nv_transfermem_size = 0x40000;
-    ViLayerFlags __nx_vi_stray_layer_flags = (ViLayerFlags)0;
-
-    /**
-     * @brief libtesla service initializing function to override libnx's
-     *
-     */
-    void __appInit(void) {
-        tsl::hlp::doWithSmSession([]{
-            ASSERT_FATAL(fsInitialize());
-            ASSERT_FATAL(hidInitialize());                          // Controller inputs and Touch
-            if (hosversionAtLeast(16,0,0)) {
-                ASSERT_FATAL(plInitialize(PlServiceType_User));     // Font data. Use pl:u for 16.0.0+
-            } else {
-                ASSERT_FATAL(plInitialize(PlServiceType_System));   // Use pl:s for 15.0.1 and below to prevent qlaunch/overlaydisp session exhaustion
-            }
-            ASSERT_FATAL(pmdmntInitialize());                       // PID querying
-            ASSERT_FATAL(hidsysInitialize());                       // Focus control
-            ASSERT_FATAL(setsysInitialize());                       // Settings querying
-        });
-    }
-
-    /**
-     * @brief libtesla service exiting function to override libnx's
-     *
-     */
-    void __appExit(void) {
-        fsExit();
-        hidExit();
-        plExit();
-        pmdmntExit();
-        hidsysExit();
-        setsysExit();
-    }
-
-}
-
-#endif
+/**
+ * Copyright (C) 2020 werwolv
+ *
+ * This file is part of libtesla.
+ *
+ * libtesla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * libtesla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with libtesla.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <switch.h>
+
+#include <stdlib.h>
+#include <strings.h>
+#include <math.h>
+
+#include <algorithm>
+#include <cstring>
+#include <cwctype>
+#include <string>
+#include <functional>
+#include <type_traits>
+#include <mutex>
+#include <memory>
+#include <chrono>
+#include <list>
+#include <stack>
+#include <map>
+#include <filesystem>
+
+// Define this makro before including tesla.hpp in your main file. If you intend
+// to use the tesla.hpp header in more than one source file, only define it once!
+// #define TESLA_INIT_IMPL
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-function"
+
+#ifdef TESLA_INIT_IMPL
+    #define STB_TRUETYPE_IMPLEMENTATION
+#endif
+#include "stb_truetype.h"
+
+#pragma GCC diagnostic pop
+
+#define ELEMENT_BOUNDS(elem) elem->getX(), elem->getY(), elem->getWidth(), elem->getHeight()
+
+#define ASSERT_EXIT(x) if (R_FAILED(x)) std::exit(1)
+#define ASSERT_FATAL(x) if (Result res = x; R_FAILED(res)) fatalThrow(res)
+
+#define PACKED __attribute__((packed))
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+
+/// Evaluates an expression that returns a result, and returns the result if it would fail.
+#define TSL_R_TRY(resultExpr)           \
+    ({                                  \
+        const auto result = resultExpr; \
+        if (R_FAILED(result)) {         \
+            return result;              \
+        }                               \
+    })
+
+using namespace std::literals::string_literals;
+using namespace std::literals::chrono_literals;
+
+namespace tsl {
+
+    // Constants
+
+    namespace cfg {
+
+        constexpr u32 ScreenWidth = 1920;       ///< Width of the Screen
+        constexpr u32 ScreenHeight = 1080;      ///< Height of the Screen
+
+        extern u16 LayerWidth;                  ///< Width of the Tesla layer
+        extern u16 LayerHeight;                 ///< Height of the Tesla layer
+        extern u16 LayerPosX;                   ///< X position of the Tesla layer
+        extern u16 LayerPosY;                   ///< Y position of the Tesla layer
+        extern u16 FramebufferWidth;            ///< Width of the framebuffer
+        extern u16 FramebufferHeight;           ///< Height of the framebuffer
+        extern u64 launchCombo;                 ///< Overlay activation key combo
+
+    }
+
+    /**
+     * @brief RGBA4444 Color structure
+     */
+    struct Color {
+
+        union {
+            struct {
+                u16 r: 4, g: 4, b: 4, a: 4;
+            } PACKED;
+            u16 rgba;
+        };
+
+        constexpr inline Color(u16 raw): rgba(raw) {}
+        constexpr inline Color(u8 r, u8 g, u8 b, u8 a): r(r), g(g), b(b), a(a) {}
+    };
+
+    namespace style {
+        constexpr u32 ListItemDefaultHeight         = 70;       ///< Standard list item height
+        constexpr u32 TrackBarDefaultHeight         = 90;       ///< Standard track bar height
+        constexpr u8  ListItemHighlightSaturation   = 6;        ///< Maximum saturation of Listitem highlights
+        constexpr u8  ListItemHighlightLength       = 22;       ///< Maximum length of Listitem highlights
+
+        namespace color {
+            constexpr Color ColorFrameBackground  = { 0x0, 0x0, 0x0, 0xD };   ///< Overlay frame background color
+            constexpr Color ColorTransparent      = { 0x0, 0x0, 0x0, 0x0 };   ///< Transparent color
+            constexpr Color ColorHighlight        = { 0x0, 0xF, 0xD, 0xF };   ///< Greenish highlight color
+            constexpr Color ColorFrame            = { 0x7, 0x7, 0x7, 0xF };   ///< Outer boarder color
+            constexpr Color ColorHandle           = { 0x5, 0x5, 0x5, 0xF };   ///< Track bar handle color
+            constexpr Color ColorText             = { 0xF, 0xF, 0xF, 0xF };   ///< Standard text color
+            constexpr Color ColorDescription      = { 0xA, 0xA, 0xA, 0xF };   ///< Description text color
+            constexpr Color ColorHeaderBar        = { 0xC, 0xC, 0xC, 0xF };   ///< Category header rectangle color
+            constexpr Color ColorClickAnimation   = { 0x0, 0x2, 0x2, 0xF };   ///< Element click animation color
+        }
+    }
+
+    // Declarations
+
+    /**
+     * @brief Direction in which focus moved before landing on
+     *        the currently focused element
+     */
+    enum class FocusDirection {
+        None,                       ///< Focus was placed on the element programatically without user input
+        Up,                         ///< Focus moved upwards
+        Down,                       ///< Focus moved downwards
+        Left,                       ///< Focus moved from left to rigth
+        Right                       ///< Focus moved from right to left
+    };
+
+    /**
+     * @brief Current input controll mode
+     *
+     */
+    enum class InputMode {
+        Controller,                 ///< Input from controller
+        Touch,                      ///< Touch input
+        TouchScroll                 ///< Moving/scrolling touch input
+    };
+
+    class Overlay;
+    namespace elm { class Element; }
+
+    namespace impl {
+
+        /**
+         * @brief Overlay launch parameters
+         */
+        enum class LaunchFlags : u8 {
+            None = 0,                       ///< Do nothing special at launch
+            CloseOnExit        = BIT(0)     ///< Close the overlay the last Gui gets poped from the stack
+        };
+
+        [[maybe_unused]] static constexpr LaunchFlags operator|(LaunchFlags lhs, LaunchFlags rhs) {
+            return static_cast<LaunchFlags>(u8(lhs) | u8(rhs));
+        }
+
+        /**
+         * @brief Combo key mapping
+         */
+        struct KeyInfo {
+            u64 key;
+            const char* name;
+            const char* glyph;
+        };
+
+        /**
+         * @brief Combo key mappings
+         *
+         * Ordered as they should be displayed
+         */
+        constexpr std::array<KeyInfo, 18> KEYS_INFO = {{
+            { HidNpadButton_L, "L", "\uE0A4" }, { HidNpadButton_R, "R", "\uE0A5" },
+            { HidNpadButton_ZL, "ZL", "\uE0A6" }, { HidNpadButton_ZR, "ZR", "\uE0A7" },
+            { HidNpadButton_AnySL, "SL", "\uE0A8" }, { HidNpadButton_AnySR, "SR", "\uE0A9" },
+            { HidNpadButton_Left, "DLEFT", "\uE07B" }, { HidNpadButton_Up, "DUP", "\uE079" }, { HidNpadButton_Right, "DRIGHT", "\uE07C" }, { HidNpadButton_Down, "DDOWN", "\uE07A" },
+            { HidNpadButton_A, "A", "\uE0A0" }, { HidNpadButton_B, "B", "\uE0A1" }, { HidNpadButton_X, "X", "\uE0A2" }, { HidNpadButton_Y, "Y", "\uE0A3" },
+            { HidNpadButton_StickL, "LS", "\uE08A" }, { HidNpadButton_StickR, "RS", "\uE08B" },
+            { HidNpadButton_Minus, "MINUS", "\uE0B6" }, { HidNpadButton_Plus, "PLUS", "\uE0B5" }
+        }};
+
+    }
+
+    [[maybe_unused]] static void goBack();
+
+    [[maybe_unused]] static void setNextOverlay(const std::string& ovlPath, std::string args = "");
+
+    template<typename TOverlay, impl::LaunchFlags launchFlags = impl::LaunchFlags::CloseOnExit>
+    int loop(int argc, char** argv);
+
+    // Helpers
+
+    namespace hlp {
+
+        /**
+         * @brief Wrapper for service initialization
+         *
+         * @param f wrapped function
+         */
+        template<typename F>
+        static inline void doWithSmSession(F f) {
+            smInitialize();
+            f();
+            smExit();
+        }
+
+        /**
+         * @brief Wrapper for sd card access using stdio
+         * @note Consider using raw fs calls instead as they are faster and need less space
+         *
+         * @param f wrapped function
+         */
+        template<typename F>
+        static inline void doWithSDCardHandle(F f) {
+            fsdevMountSdmc();
+            f();
+            fsdevUnmountDevice("sdmc");
+        }
+
+        /**
+         * @brief Guard that will execute a passed function at the end of the current scope
+         *
+         * @param f wrapped function
+         */
+        template<typename F>
+        class ScopeGuard {
+            ScopeGuard(const ScopeGuard&) = delete;
+            ScopeGuard& operator=(const ScopeGuard&) = delete;
+            private:
+                F f;
+                bool canceled = false;
+            public:
+                ALWAYS_INLINE ScopeGuard(F f) : f(std::move(f)) { }
+                ALWAYS_INLINE ~ScopeGuard() { if (!canceled) { f(); } }
+                void dismiss() { canceled = true; }
+        };
+
+        /**
+         * @brief libnx hid:sys shim that gives or takes away frocus to or from the process with the given aruid
+         *
+         * @param enable Give focus or take focus
+         * @param aruid Aruid of the process to focus/unfocus
+         * @return Result Result
+         */
+        static Result hidsysEnableAppletToGetInput(bool enable, u64 aruid) {
+            const struct {
+                u8 permitInput;
+                u64 appletResourceUserId;
+            } in = { enable != 0, aruid };
+
+            return serviceDispatchIn(hidsysGetServiceSession(), 503, in);
+        }
+
+        static Result viAddToLayerStack(ViLayer *layer, ViLayerStack stack) {
+            const struct {
+                u32 stack;
+                u64 layerId;
+            } in = { stack, layer->layer_id };
+
+            return serviceDispatchIn(viGetSession_IManagerDisplayService(), 6000, in);
+        }
+
+        /**
+         * @brief Toggles focus between the Tesla overlay and the rest of the system
+         *
+         * @param enabled Focus Tesla?
+         */
+        static void requestForeground(bool enabled) {
+            u64 applicationAruid = 0, appletAruid = 0;
+
+            for (u64 programId = 0x0100000000001000UL; programId < 0x0100000000001020UL; programId++) {
+                pmdmntGetProcessId(&appletAruid, programId);
+
+                if (appletAruid != 0)
+                    hidsysEnableAppletToGetInput(!enabled, appletAruid);
+            }
+
+            pmdmntGetApplicationProcessId(&applicationAruid);
+            hidsysEnableAppletToGetInput(!enabled, applicationAruid);
+
+            hidsysEnableAppletToGetInput(true, 0);
+        }
+
+        /**
+         * @brief Splits a string at the given delimeters
+         *
+         * @param str String to split
+         * @param delim Delimeter
+         * @return Vector containing the split tokens
+         */
+        static std::vector<std::string> split(const std::string& str, char delim = ' ') {
+            std::vector<std::string> out;
+
+            std::size_t current, previous = 0;
+            current = str.find(delim);
+            while (current != std::string::npos) {
+                out.push_back(str.substr(previous, current - previous));
+                previous = current + 1;
+                current = str.find(delim, previous);
+            }
+            out.push_back(str.substr(previous, current - previous));
+
+            return out;
+        }
+
+        namespace ini {
+
+            /**
+             * @brief Ini file type
+             */
+            using IniData = std::map<std::string, std::map<std::string, std::string>>;
+
+            /**
+             * @brief Tesla config file
+             */
+            static const char* CONFIG_FILE = "/config/tesla/config.ini";
+
+            /**
+             * @brief Parses a ini string
+             *
+             * @param str String to parse
+             * @return Parsed data
+             */
+            static IniData parseIni(const std::string &str) {
+                IniData iniData;
+
+                auto lines = split(str, '\n');
+
+                std::string lastHeader = "";
+                for (auto& line : lines) {
+                    line.erase(std::remove_if(line.begin(), line.end(), ::isspace), line.end());
+
+                    if (line[0] == '[' && line[line.size() - 1] == ']') {
+                        lastHeader = line.substr(1, line.size() - 2);
+                        iniData.emplace(lastHeader, std::map<std::string, std::string>{});
+                    }
+                    else if (auto keyValuePair = split(line, '='); keyValuePair.size() == 2) {
+                        iniData[lastHeader].emplace(keyValuePair[0], keyValuePair[1]);
+                    }
+                }
+
+                return iniData;
+            }
+
+            /**
+             * @brief Unparses ini data into a string
+             *
+             * @param iniData Ini data
+             * @return Ini string
+             */
+            static std::string unparseIni(IniData const &iniData) {
+                std::string string;
+                bool addSectionGap = false;
+                for (auto &section : iniData) {
+                    if (addSectionGap)
+                        string += "\n";
+                    string += "["s + section.first + "]\n"s;
+                    for (auto &keyValue : section.second) {
+                        string += keyValue.first + "="s + keyValue.second + "\n"s;
+                    }
+                }
+                return string;
+            }
+
+            /**
+             * @brief Read Tesla settings file
+             *
+             * @return Settings data
+             */
+            static IniData readOverlaySettings() {
+                /* Open Sd card filesystem. */
+                FsFileSystem fsSdmc;
+                if (R_FAILED(fsOpenSdCardFileSystem(&fsSdmc)))
+                    return {};
+                hlp::ScopeGuard fsGuard([&] { fsFsClose(&fsSdmc); });
+
+                /* Open config file. */
+                FsFile fileConfig;
+                if (R_FAILED(fsFsOpenFile(&fsSdmc, CONFIG_FILE, FsOpenMode_Read, &fileConfig)))
+                    return {};
+                hlp::ScopeGuard fileGuard([&] { fsFileClose(&fileConfig); });
+
+                /* Get config file size. */
+                s64 configFileSize;
+                if (R_FAILED(fsFileGetSize(&fileConfig, &configFileSize)))
+                    return {};
+
+                /* Read and parse config file. */
+                std::string configFileData(configFileSize, '\0');
+                u64 readSize;
+                Result rc = fsFileRead(&fileConfig, 0, configFileData.data(), configFileSize, FsReadOption_None, &readSize);
+                if (R_FAILED(rc) || readSize != static_cast<u64>(configFileSize))
+                    return {};
+
+                return parseIni(configFileData);
+            }
+
+            /**
+             * @brief Replace Tesla settings file with new data
+             *
+             * @param iniData new data
+             */
+            static void writeOverlaySettings(IniData const &iniData) {
+                /* Open Sd card filesystem. */
+                FsFileSystem fsSdmc;
+                if (R_FAILED(fsOpenSdCardFileSystem(&fsSdmc)))
+                    return;
+                hlp::ScopeGuard fsGuard([&] { fsFsClose(&fsSdmc); });
+
+                /* Open config file. */
+                FsFile fileConfig;
+                if (R_FAILED(fsFsOpenFile(&fsSdmc, CONFIG_FILE, FsOpenMode_Write, &fileConfig)))
+                    return;
+                hlp::ScopeGuard fileGuard([&] { fsFileClose(&fileConfig); });
+
+                std::string iniString = unparseIni(iniData);
+
+                fsFileWrite(&fileConfig, 0, iniString.c_str(), iniString.length(), FsWriteOption_Flush);
+            }
+
+            /**
+             * @brief Merge and save changes into Tesla settings file
+             *
+             * @param changes setting values to add or update
+             */
+            static void updateOverlaySettings(IniData const &changes) {
+                hlp::ini::IniData iniData = hlp::ini::readOverlaySettings();
+                for (auto &section : changes) {
+                    for (auto &keyValue : section.second) {
+                        iniData[section.first][keyValue.first] = keyValue.second;
+                    }
+                }
+                writeOverlaySettings(iniData);
+            }
+
+        }
+
+        /**
+         * @brief Decodes a key string into it's key code
+         *
+         * @param value Key string
+         * @return Key code
+         */
+        static u64 stringToKeyCode(const std::string &value) {
+            for (auto &keyInfo : impl::KEYS_INFO) {
+                if (strcasecmp(value.c_str(), keyInfo.name) == 0)
+                    return keyInfo.key;
+            }
+            return 0;
+        }
+
+        /**
+         * @brief Decodes a combo string into key codes
+         *
+         * @param value Combo string
+         * @return Key codes
+         */
+        static u64 comboStringToKeys(const std::string &value) {
+            u64 keyCombo = 0x00;
+            for (std::string key : hlp::split(value, '+')) {
+                keyCombo |= hlp::stringToKeyCode(key);
+            }
+            return keyCombo;
+        }
+
+        /**
+         * @brief Encodes key codes into a combo string
+         *
+         * @param keys Key codes
+         * @return Combo string
+         */
+        static std::string keysToComboString(u64 keys) {
+            std::string str;
+            for (auto &keyInfo : impl::KEYS_INFO) {
+                if (keys & keyInfo.key) {
+                    if (!str.empty())
+                        str.append("+");
+                    str.append(keyInfo.name);
+                }
+            }
+            return str;
+        }
+
+    }
+
+    // Renderer
+
+    namespace gfx {
+
+        extern "C" u64 __nx_vi_layer_id;
+
+        struct ScissoringConfig {
+            s32 x, y, w, h;
+        };
+
+        /**
+         * @brief Manages the Tesla layer and draws raw data to the screen
+         */
+        class Renderer final {
+        public:
+            Renderer& operator=(Renderer&) = delete;
+
+            friend class tsl::Overlay;
+
+            /**
+             * @brief Handles opacity of drawn colors for fadeout. Pass all colors through this function in order to apply opacity properly
+             *
+             * @param c Original color
+             * @return Color with applied opacity
+             */
+            static Color a(const Color &c) {
+                return (c.rgba & 0x0FFF) | (static_cast<u8>(c.a * Renderer::s_opacity) << 12);
+            }
+
+            /**
+             * @brief Enables scissoring, discarding of any draw outside the given boundaries
+             *
+             * @param x x pos
+             * @param y y pos
+             * @param w Width
+             * @param h Height
+             */
+            inline void enableScissoring(s32 x, s32 y, s32 w, s32 h) {
+                this->m_scissoringStack.emplace(x, y, w, h);
+            }
+
+            /**
+             * @brief Disables scissoring
+             */
+            inline void disableScissoring() {
+                this->m_scissoringStack.pop();
+            }
+
+
+            // Drawing functions
+
+            /**
+             * @brief Draw a single pixel onto the screen
+             *
+             * @param x X pos
+             * @param y Y pos
+             * @param color Color
+             */
+            inline void setPixel(s32 x, s32 y, Color color) {
+                if (x < 0 || y < 0 || x >= cfg::FramebufferWidth || y >= cfg::FramebufferHeight)
+                    return;
+
+                u32 offset = this->getPixelOffset(x, y);
+
+                if (offset != UINT32_MAX)
+                    static_cast<Color*>(this->getCurrentFramebuffer())[offset] = color;
+            }
+
+            /**
+             * @brief Blends two colors
+             *
+             * @param src Source color
+             * @param dst Destination color
+             * @param alpha Opacity
+             * @return Blended color
+             */
+            inline u8 blendColor(u8 src, u8 dst, u8 alpha) {
+                u8 oneMinusAlpha = 0x0F - alpha;
+
+                return (dst * alpha + src * oneMinusAlpha) / double(0xF);
+            }
+
+            /**
+             * @brief Draws a single source blended pixel onto the screen
+             *
+             * @param x X pos
+             * @param y Y pos
+             * @param color Color
+             */
+            inline void setPixelBlendSrc(s32 x, s32 y, Color color) {
+                if (x < 0 || y < 0 || x >= cfg::FramebufferWidth || y >= cfg::FramebufferHeight)
+                    return;
+
+                u32 offset = this->getPixelOffset(x, y);
+
+                if (offset == UINT32_MAX)
+                    return;
+
+                Color src((static_cast<u16*>(this->getCurrentFramebuffer()))[offset]);
+                Color dst(color);
+                Color end(0);
+
+                end.r = this->blendColor(src.r, dst.r, dst.a);
+                end.g = this->blendColor(src.g, dst.g, dst.a);
+                end.b = this->blendColor(src.b, dst.b, dst.a);
+                end.a = src.a;
+
+                this->setPixel(x, y, end);
+            }
+
+            /**
+             * @brief Draws a single destination blended pixel onto the screen
+             *
+             * @param x X pos
+             * @param y Y pos
+             * @param color Color
+             */
+            inline void setPixelBlendDst(s32 x, s32 y, Color color) {
+                if (x < 0 || y < 0 || x >= cfg::FramebufferWidth || y >= cfg::FramebufferHeight)
+                    return;
+
+                u32 offset = this->getPixelOffset(x, y);
+
+                if (offset == UINT32_MAX)
+                    return;
+
+                Color src((static_cast<u16*>(this->getCurrentFramebuffer()))[offset]);
+                Color dst(color);
+                Color end(0);
+
+                end.r = this->blendColor(src.r, dst.r, dst.a);
+                end.g = this->blendColor(src.g, dst.g, dst.a);
+                end.b = this->blendColor(src.b, dst.b, dst.a);
+                end.a = std::min(dst.a + src.a, 0xF);
+
+                this->setPixel(x, y, end);
+            }
+
+            /**
+             * @brief Draws a rectangle of given sizes
+             *
+             * @param x X pos
+             * @param y Y pos
+             * @param w Width
+             * @param h Height
+             * @param color Color
+             */
+            inline void drawRect(s32 x, s32 y, s32 w, s32 h, Color color) {
+                for (s32 x1 = x; x1 < (x + w); x1++)
+                    for (s32 y1 = y; y1 < (y + h); y1++)
+                        this->setPixelBlendDst(x1, y1, color);
+            }
+
+            void drawCircle(s32 centerX, s32 centerY, u16 radius, bool filled, Color color) {
+                s32 x = radius;
+                s32 y = 0;
+                s32 radiusError = 0;
+                s32 xChange = 1 - (radius << 1);
+                s32 yChange = 0;
+
+                while (x >= y) {
+                    if(filled) {
+                        for (s32 i = centerX - x; i <= centerX + x; i++) {
+                            s32 y0 = centerY + y;
+                            s32 y1 = centerY - y;
+                            s32 x0 = i;
+
+                            this->setPixelBlendDst(x0, y0, color);
+                            this->setPixelBlendDst(x0, y1, color);
+                        }
+
+                        for (s32 i = centerX - y; i <= centerX + y; i++) {
+                            s32 y0 = centerY + x;
+                            s32 y1 = centerY - x;
+                            s32 x0 = i;
+
+                            this->setPixelBlendDst(x0, y0, color);
+                            this->setPixelBlendDst(x0, y1, color);
+                        }
+
+                        y++;
+                        radiusError += yChange;
+                        yChange += 2;
+                        if (((radiusError << 1) + xChange) > 0) {
+                            x--;
+                            radiusError += xChange;
+                            xChange += 2;
+                        }
+                    } else {
+                        this->setPixelBlendDst(centerX + x, centerY + y, color);
+                        this->setPixelBlendDst(centerX + y, centerY + x, color);
+                        this->setPixelBlendDst(centerX - y, centerY + x, color);
+                        this->setPixelBlendDst(centerX - x, centerY + y, color);
+                        this->setPixelBlendDst(centerX - x, centerY - y, color);
+                        this->setPixelBlendDst(centerX - y, centerY - x, color);
+                        this->setPixelBlendDst(centerX + y, centerY - x, color);
+                        this->setPixelBlendDst(centerX + x, centerY - y, color);
+
+                        if(radiusError <= 0) {
+                            y++;
+                            radiusError += 2 * y + 1;
+                        } else {
+                            x--;
+                            radiusError -= 2 * x + 1;
+                        }
+                    }
+                }
+            }
+
+            /**
+             * @brief Draws a RGBA8888 bitmap from memory
+             *
+             * @param x X start position
+             * @param y Y start position
+             * @param w Bitmap width
+             * @param h Bitmap height
+             * @param bmp Pointer to bitmap data
+             */
+            void drawBitmap(s32 x, s32 y, s32 w, s32 h, const u8 *bmp) {
+                for (s32 y1 = 0; y1 < h; y1++) {
+                    for (s32 x1 = 0; x1 < w; x1++) {
+                        const Color color = { static_cast<u8>(bmp[0] >> 4), static_cast<u8>(bmp[1] >> 4), static_cast<u8>(bmp[2] >> 4), static_cast<u8>(bmp[3] >> 4) };
+                        setPixelBlendSrc(x + x1, y + y1, a(color));
+                        bmp += 4;
+                    }
+                }
+            }
+
+            /**
+             * @brief Fills the entire layer with a given color
+             *
+             * @param color Color
+             */
+            inline void fillScreen(Color color) {
+                std::fill_n(static_cast<Color*>(this->getCurrentFramebuffer()), this->getFramebufferSize() / sizeof(Color), color);
+            }
+
+            /**
+             * @brief Clears the layer (With transparency)
+             *
+             */
+            inline void clearScreen() {
+                this->fillScreen({ 0x00, 0x00, 0x00, 0x00 });
+            }
+
+            /**
+             * @brief Draws a string
+             *
+             * @param string String to draw
+             * @param monospace Draw string in monospace font
+             * @param x X pos
+             * @param y Y pos
+             * @param fontSize Height of the text drawn in pixels
+             * @param color Text color. Use transparent color to skip drawing and only get the string's dimensions
+             * @return Dimensions of drawn string
+             */
+            std::pair<u32, u32> drawString(const char* string, bool monospace, s32 x, s32 y, double fontSize, Color color, ssize_t maxWidth = 0) {
+                s32 maxX = x;
+                s32 currX = x;
+                s32 currY = y;
+
+                struct Glyph {
+                    stbtt_fontinfo *currFont;
+                    double currFontSize;
+                    int bounds[4];
+                    int xAdvance;
+                    u8 *glyphBmp;
+                    int width, height;
+                };
+
+                static std::unordered_map<u64, Glyph> s_glyphCache;
+
+                do {
+                    if (maxWidth > 0 && maxWidth < (currX - x))
+                        break;
+
+                    u32 currCharacter;
+                    ssize_t codepointWidth = decode_utf8(&currCharacter, reinterpret_cast<const u8*>(string));
+
+                    if (codepointWidth <= 0)
+                        break;
+
+                    string += codepointWidth;
+
+                    if (currCharacter == '\n') {
+                        maxX = std::max(currX, maxX);
+
+                        currX = x;
+                        currY += fontSize;
+
+                        continue;
+                    }
+
+                    u64 key = (static_cast<u64>(currCharacter) << 32) | static_cast<u64>(monospace) << 31 | static_cast<u64>(std::bit_cast<u64>(fontSize));
+
+                    Glyph *glyph = nullptr;
+
+                    auto it = s_glyphCache.find(key);
+                    if (it == s_glyphCache.end()) {
+                        /* Cache glyph */
+                        glyph = &s_glyphCache.emplace(key, Glyph()).first->second;
+
+                        if (stbtt_FindGlyphIndex(&this->m_extFont, currCharacter))
+                            glyph->currFont = &this->m_extFont;
+                        else if(this->m_hasLocalFont && stbtt_FindGlyphIndex(&this->m_stdFont, currCharacter)==0)
+                            glyph->currFont = &this->m_localFont;
+                        else
+                            glyph->currFont = &this->m_stdFont;
+
+                        glyph->currFontSize = stbtt_ScaleForPixelHeight(glyph->currFont, fontSize);
+
+                        stbtt_GetCodepointBitmapBoxSubpixel(glyph->currFont, currCharacter, glyph->currFontSize, glyph->currFontSize,
+                                                            0, 0, &glyph->bounds[0], &glyph->bounds[1], &glyph->bounds[2], &glyph->bounds[3]);
+
+                        int yAdvance = 0;
+                        stbtt_GetCodepointHMetrics(glyph->currFont, monospace ? 'W' : currCharacter, &glyph->xAdvance, &yAdvance);
+
+                        glyph->glyphBmp = stbtt_GetCodepointBitmap(glyph->currFont, glyph->currFontSize, glyph->currFontSize, currCharacter, &glyph->width, &glyph->height, nullptr, nullptr);
+                    } else {
+                        /* Use cached glyph */
+                        glyph = &it->second;
+                    }
+
+                    if (glyph->glyphBmp != nullptr && !std::iswspace(currCharacter) && fontSize > 0 && color.a != 0x0) {
+
+                        auto x = currX + glyph->bounds[0];
+                        auto y = currY + glyph->bounds[1];
+                        for (s32 bmpY = 0; bmpY < glyph->height; bmpY++) {
+                            for (s32 bmpX = 0; bmpX < glyph->width; bmpX++) {
+                                auto bmpColor = glyph->glyphBmp[glyph->width * bmpY + bmpX] >> 4;
+                                if (bmpColor == 0xF) {
+                                    this->setPixel(x + bmpX, y + bmpY, color);
+                                } else if (bmpColor != 0x0) {
+                                    Color tmpColor = color;
+                                    tmpColor.a = bmpColor * (double(tmpColor.a) / 0xF);
+                                    this->setPixelBlendDst(x + bmpX, y + bmpY, tmpColor);
+                                }
+                            }
+                        }
+
+                    }
+
+                    currX += static_cast<s32>(glyph->xAdvance * glyph->currFontSize);
+
+                } while (*string != '\0');
+
+                maxX = std::max(currX, maxX);
+
+                return { maxX - x, currY - y };
+            }
+
+            /**
+             * @brief Limit a strings length and end it with "…"
+             *
+             * @param string String to truncate
+             * @param maxLength Maximum length of string
+             */
+            std::string limitStringLength(std::string string, bool monospace, double fontSize, s32 maxLength) {
+                if (string.size() < 2)
+                    return string;
+
+                s32 currX = 0;
+                ssize_t strPos = 0;
+                ssize_t codepointWidth;
+
+                do {
+                    u32 currCharacter;
+                    codepointWidth = decode_utf8(&currCharacter, reinterpret_cast<const u8*>(&string[strPos]));
+
+                    if (codepointWidth <= 0)
+                        break;
+
+                    strPos += codepointWidth;
+
+                    stbtt_fontinfo *currFont = nullptr;
+
+                    if (stbtt_FindGlyphIndex(&this->m_extFont, currCharacter))
+                        currFont = &this->m_extFont;
+                    else if(this->m_hasLocalFont && stbtt_FindGlyphIndex(&this->m_stdFont, currCharacter)==0)
+                        currFont = &this->m_localFont;
+                    else
+                        currFont = &this->m_stdFont;
+
+                    double currFontSize = stbtt_ScaleForPixelHeight(currFont, fontSize);
+
+                    int xAdvance = 0, yAdvance = 0;
+                    stbtt_GetCodepointHMetrics(currFont, monospace ? 'W' : currCharacter, &xAdvance, &yAdvance);
+
+                    currX += static_cast<s32>(xAdvance * currFontSize);
+
+                } while (string[strPos] != '\0' && string[strPos] != '\n' && currX < maxLength);
+
+                string = string.substr(0, strPos - codepointWidth) + "…";
+                string.shrink_to_fit();
+
+                return string;
+            }
+
+        private:
+            Renderer() {}
+
+            /**
+             * @brief Gets the renderer instance
+             *
+             * @return Renderer
+             */
+            static Renderer& get() {
+                static Renderer renderer;
+
+                return renderer;
+            }
+
+            /**
+             * @brief Sets the opacity of the layer
+             *
+             * @param opacity Opacity
+             */
+            static void setOpacity(double opacity) {
+                opacity = std::clamp(opacity, (double)0.0, (double)1.0);
+
+                Renderer::s_opacity = opacity;
+            }
+
+            bool m_initialized = false;
+            ViDisplay m_display;
+            ViLayer m_layer;
+            Event m_vsyncEvent;
+
+            NWindow m_window;
+            Framebuffer m_framebuffer;
+            void *m_currentFramebuffer = nullptr;
+
+            std::stack<ScissoringConfig> m_scissoringStack;
+
+            stbtt_fontinfo m_stdFont, m_localFont, m_extFont;
+            bool m_hasLocalFont = false;
+
+            static inline double s_opacity = 1.0F;
+
+            /**
+             * @brief Get the current framebuffer address
+             *
+             * @return Framebuffer address
+             */
+            inline void* getCurrentFramebuffer() {
+                return this->m_currentFramebuffer;
+            }
+
+            /**
+             * @brief Get the next framebuffer address
+             *
+             * @return Next framebuffer address
+             */
+            inline void* getNextFramebuffer() {
+                return static_cast<u8*>(this->m_framebuffer.buf) + this->getNextFramebufferSlot() * this->getFramebufferSize();
+            }
+
+            /**
+             * @brief Get the framebuffer size
+             *
+             * @return Framebuffer size
+             */
+            inline size_t getFramebufferSize() {
+                return this->m_framebuffer.fb_size;
+            }
+
+            /**
+             * @brief Get the number of framebuffers in use
+             *
+             * @return Number of framebuffers
+             */
+            inline size_t getFramebufferCount() {
+                return this->m_framebuffer.num_fbs;
+            }
+
+            /**
+             * @brief Get the currently used framebuffer's slot
+             *
+             * @return Slot
+             */
+            inline u8 getCurrentFramebufferSlot() {
+                return this->m_window.cur_slot;
+            }
+
+            /**
+             * @brief Get the next framebuffer's slot
+             *
+             * @return Next slot
+             */
+            inline u8 getNextFramebufferSlot() {
+                return (this->getCurrentFramebufferSlot() + 1) % this->getFramebufferCount();
+            }
+
+            /**
+             * @brief Waits for the vsync event
+             *
+             */
+            inline void waitForVSync() {
+                eventWait(&this->m_vsyncEvent, UINT64_MAX);
+            }
+
+            /**
+             * @brief Decodes a x and y coordinate into a offset into the swizzled framebuffer
+             *
+             * @param x X pos
+             * @param y Y Pos
+             * @return Offset
+             */
+            u32 getPixelOffset(s32 x, s32 y) {
+                if (!this->m_scissoringStack.empty()) {
+                    auto currScissorConfig = this->m_scissoringStack.top();
+                    if (x < currScissorConfig.x ||
+                        y < currScissorConfig.y ||
+                        x > currScissorConfig.x + currScissorConfig.w ||
+                        y > currScissorConfig.y + currScissorConfig.h)
+                            return UINT32_MAX;
+                }
+
+                u32 tmpPos = ((y & 127) / 16) + (x / 32 * 8) + ((y / 16 / 8) * (((cfg::FramebufferWidth / 2) / 16 * 8)));
+                tmpPos *= 16 * 16 * 4;
+
+                tmpPos += ((y % 16) / 8) * 512 + ((x % 32) / 16) * 256 + ((y % 8) / 2) * 64 + ((x % 16) / 8) * 32 + (y % 2) * 16 + (x % 8) * 2;
+
+                return tmpPos / 2;
+            }
+
+            /**
+             * @brief Initializes the renderer and layers
+             *
+             */
+            void init() {
+
+                cfg::LayerPosX = 0;
+                cfg::LayerPosY = 0;
+                cfg::FramebufferWidth  = 448;
+                cfg::FramebufferHeight = 720;
+                cfg::LayerWidth  = cfg::ScreenHeight * (double(cfg::FramebufferWidth) / double(cfg::FramebufferHeight));
+                cfg::LayerHeight = cfg::ScreenHeight;
+
+                if (this->m_initialized)
+                    return;
+
+                tsl::hlp::doWithSmSession([this]{
+                    ASSERT_FATAL(viInitialize(ViServiceType_Manager));
+                    ASSERT_FATAL(viOpenDefaultDisplay(&this->m_display));
+                    ASSERT_FATAL(viGetDisplayVsyncEvent(&this->m_display, &this->m_vsyncEvent));
+                    ASSERT_FATAL(viCreateManagedLayer(&this->m_display, static_cast<ViLayerFlags>(0), 0, &__nx_vi_layer_id));
+                    ASSERT_FATAL(viCreateLayer(&this->m_display, &this->m_layer));
+                    ASSERT_FATAL(viSetLayerScalingMode(&this->m_layer, ViScalingMode_FitToLayer));
+
+                    if (s32 layerZ = 0; R_SUCCEEDED(viGetZOrderCountMax(&this->m_display, &layerZ)) && layerZ > 0)
+                        ASSERT_FATAL(viSetLayerZ(&this->m_layer, layerZ));
+
+                    ASSERT_FATAL(tsl::hlp::viAddToLayerStack(&this->m_layer, ViLayerStack_Default));
+                    ASSERT_FATAL(tsl::hlp::viAddToLayerStack(&this->m_layer, ViLayerStack_Screenshot));
+                    ASSERT_FATAL(tsl::hlp::viAddToLayerStack(&this->m_layer, ViLayerStack_Recording));
+                    ASSERT_FATAL(tsl::hlp::viAddToLayerStack(&this->m_layer, ViLayerStack_Arbitrary));
+                    ASSERT_FATAL(tsl::hlp::viAddToLayerStack(&this->m_layer, ViLayerStack_LastFrame));
+                    ASSERT_FATAL(tsl::hlp::viAddToLayerStack(&this->m_layer, ViLayerStack_Null));
+                    ASSERT_FATAL(tsl::hlp::viAddToLayerStack(&this->m_layer, ViLayerStack_ApplicationForDebug));
+                    ASSERT_FATAL(tsl::hlp::viAddToLayerStack(&this->m_layer, ViLayerStack_Lcd));
+                    //ASSERT_FATAL(tsl::hlp::viAddToLayerStack(&this->m_layer, 8));
+
+                    ASSERT_FATAL(viSetLayerSize(&this->m_layer, cfg::LayerWidth, cfg::LayerHeight));
+                    ASSERT_FATAL(viSetLayerPosition(&this->m_layer, cfg::LayerPosX, cfg::LayerPosY));
+                    ASSERT_FATAL(nwindowCreateFromLayer(&this->m_window, &this->m_layer));
+                    ASSERT_FATAL(framebufferCreate(&this->m_framebuffer, &this->m_window, cfg::FramebufferWidth, cfg::FramebufferHeight, PIXEL_FORMAT_RGBA_4444, 2));
+                    ASSERT_FATAL(setInitialize());
+                    ASSERT_FATAL(this->initFonts());
+                    setExit();
+                });
+
+                this->m_initialized = true;
+            }
+
+            /**
+             * @brief Exits the renderer and layer
+             *
+             */
+            void exit() {
+                if (!this->m_initialized)
+                    return;
+
+                framebufferClose(&this->m_framebuffer);
+                nwindowClose(&this->m_window);
+                viDestroyManagedLayer(&this->m_layer);
+                viCloseDisplay(&this->m_display);
+                eventClose(&this->m_vsyncEvent);
+                viExit();
+            }
+
+            /**
+             * @brief Initializes Nintendo's shared fonts. Default and Extended
+             *
+             * @return Result
+             */
+            Result initFonts() {
+                static PlFontData stdFontData, localFontData, extFontData;
+
+                // Nintendo's default font
+                TSL_R_TRY(plGetSharedFontByType(&stdFontData, PlSharedFontType_Standard));
+
+                u8 *fontBuffer = reinterpret_cast<u8*>(stdFontData.address);
+                stbtt_InitFont(&this->m_stdFont, fontBuffer, stbtt_GetFontOffsetForIndex(fontBuffer, 0));
+
+                u64 languageCode;
+                if (R_SUCCEEDED(setGetSystemLanguage(&languageCode))) {
+                    // Check if need localization font
+                    SetLanguage setLanguage;
+                    TSL_R_TRY(setMakeLanguage(languageCode, &setLanguage));
+                    this->m_hasLocalFont = true;
+                    switch (setLanguage) {
+                    case SetLanguage_ZHCN:
+                    case SetLanguage_ZHHANS:
+                        TSL_R_TRY(plGetSharedFontByType(&localFontData, PlSharedFontType_ChineseSimplified));
+                        break;
+                    case SetLanguage_KO:
+                        TSL_R_TRY(plGetSharedFontByType(&localFontData, PlSharedFontType_KO));
+                        break;
+                    case SetLanguage_ZHTW:
+                    case SetLanguage_ZHHANT:
+                        TSL_R_TRY(plGetSharedFontByType(&localFontData, PlSharedFontType_ChineseTraditional));
+                        break;
+                    default:
+                        this->m_hasLocalFont = false;
+                        break;
+                    }
+
+                    if (this->m_hasLocalFont) {
+                        fontBuffer = reinterpret_cast<u8*>(localFontData.address);
+                        stbtt_InitFont(&this->m_localFont, fontBuffer, stbtt_GetFontOffsetForIndex(fontBuffer, 0));
+                    }
+                }
+
+                // Nintendo's extended font containing a bunch of icons
+                TSL_R_TRY(plGetSharedFontByType(&extFontData, PlSharedFontType_NintendoExt));
+
+                fontBuffer = reinterpret_cast<u8*>(extFontData.address);
+                stbtt_InitFont(&this->m_extFont, fontBuffer, stbtt_GetFontOffsetForIndex(fontBuffer, 0));
+
+                return 0;
+            }
+
+            /**
+             * @brief Start a new frame
+             * @warning Don't call this more than once before calling \ref endFrame
+             */
+            inline void startFrame() {
+                this->m_currentFramebuffer = framebufferBegin(&this->m_framebuffer, nullptr);
+            }
+
+            /**
+             * @brief End the current frame
+             * @warning Don't call this before calling \ref startFrame once
+             */
+            inline void endFrame() {
+                this->waitForVSync();
+                framebufferEnd(&this->m_framebuffer);
+
+                this->m_currentFramebuffer = nullptr;
+            }
+        };
+
+    }
+
+    // Elements
+
+    namespace elm {
+
+        enum class TouchEvent {
+            Touch,
+            Hold,
+            Scroll,
+            Release
+        };
+
+        /**
+         * @brief The top level Element of the libtesla UI library
+         * @note When creating your own elements, extend from this or one of it's sub classes
+         */
+        class Element {
+        public:
+            Element() {}
+            virtual ~Element() { }
+
+            /**
+             * @brief Handles focus requesting
+             * @note This function should return the element to focus.
+             *       When this element should be focused, return `this`.
+             *       When one of it's child should be focused, return `this->child->requestFocus(oldFocus, direction)`
+             *       When this element is not focusable, return `nullptr`
+             *
+             * @param oldFocus Previously focused element
+             * @param direction Direction in which focus moved. \ref FocusDirection::None is passed for the initial load
+             * @return Element to focus
+             */
+            virtual Element* requestFocus(Element *oldFocus, FocusDirection direction) {
+                return nullptr;
+            }
+
+            /**
+             * @brief Function called when a joycon button got pressed
+             *
+             * @param keys Keys pressed in the last frame
+             * @return true when button press has been consumed
+             * @return false when button press should be passed on to the parent
+             */
+            virtual bool onClick(u64 keys) {
+                return m_clickListener(keys);
+            }
+
+            /**
+             * @brief Called once per frame with the latest HID inputs
+             *
+             * @param keysDown Buttons pressed in the last frame
+             * @param keysHeld Buttons held down longer than one frame
+             * @param touchInput Last touch position
+             * @param leftJoyStick Left joystick position
+             * @param rightJoyStick Right joystick position
+             * @return Weather or not the input has been consumed
+             */
+            virtual bool handleInput(u64 keysDown, u64 keysHeld, const HidTouchState &touchPos, HidAnalogStickState joyStickPosLeft, HidAnalogStickState joyStickPosRight) {
+                return false;
+            }
+
+            /**
+             * @brief Function called when the element got touched
+             * @todo Not yet implemented
+             *
+             * @param x X pos
+             * @param y Y pos
+             * @return true when touch input has been consumed
+             * @return false when touch input should be passed on to the parent
+             */
+            virtual bool onTouch(TouchEvent event, s32 currX, s32 currY, s32 prevX, s32 prevY, s32 initialX, s32 initialY) {
+                return false;
+            }
+
+            /**
+             * @brief Called once per frame to draw the element
+             * @warning Do not call this yourself. Use \ref Element::frame(gfx::Renderer *renderer)
+             *
+             * @param renderer Renderer
+             */
+            virtual void draw(gfx::Renderer *renderer) = 0;
+
+            /**
+             * @brief Called when the underlying Gui gets created and after calling \ref Gui::invalidate() to calculate positions and boundaries of the element
+             * @warning Do not call this yourself. Use \ref Element::invalidate()
+             *
+             * @param parentX Parent X pos
+             * @param parentY Parent Y pos
+             * @param parentWidth Parent Width
+             * @param parentHeight Parent Height
+             */
+            virtual void layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) = 0;
+
+            /**
+             * @brief Draws highlighting and the element itself
+             * @note When drawing children of a element in \ref Element::draw(gfx::Renderer *renderer), use `this->child->frame(renderer)` instead of calling draw directly
+             *
+             * @param renderer
+             */
+            void frame(gfx::Renderer *renderer) {
+                renderer->enableScissoring(0, 0, tsl::cfg::FramebufferWidth, tsl::cfg::FramebufferHeight);
+
+                if (this->m_focused)
+                    this->drawFocusBackground(renderer);
+
+                renderer->disableScissoring();
+
+                this->draw(renderer);
+
+                renderer->enableScissoring(0, 0, tsl::cfg::FramebufferWidth, tsl::cfg::FramebufferHeight);
+
+                if (this->m_focused)
+                    this->drawHighlight(renderer);
+
+                renderer->disableScissoring();
+            }
+
+            /**
+             * @brief Forces a layout recreation of a element
+             *
+             */
+            void invalidate() {
+                const auto& parent = this->getParent();
+
+                if (parent == nullptr)
+                    this->layout(0, 0, cfg::FramebufferWidth, cfg::FramebufferHeight);
+                else
+                    this->layout(ELEMENT_BOUNDS(parent));
+            }
+
+            /**
+             * @brief Shake the highlight in the given direction to signal that the focus cannot move there
+             *
+             * @param direction Direction to shake highlight in
+             */
+            void shakeHighlight(FocusDirection direction) {
+                this->m_highlightShaking = true;
+                this->m_highlightShakingDirection = direction;
+                this->m_highlightShakingStartTime = std::chrono::system_clock::now();
+            }
+
+            /**
+             * @brief Triggers the blue click animation to signal a element has been clicked on
+             *
+             */
+            void triggerClickAnimation() {
+                this->m_clickAnimationProgress = tsl::style::ListItemHighlightLength;
+            }
+
+            /**
+             * @brief Resets the click animation progress, canceling the animation
+             */
+            void resetClickAnimation() {
+                this->m_clickAnimationProgress = 0;
+            }
+
+            /**
+             * @brief Draws the blue highlight animation when clicking on a button
+             * @note Override this if you have a element that e.g requires a non-rectangular animation or a different color
+             *
+             * @param renderer Renderer
+             */
+            virtual void drawClickAnimation(gfx::Renderer *renderer) {
+                Color animColor = tsl::style::color::ColorClickAnimation;
+                u8 saturation = tsl::style::ListItemHighlightSaturation * (double(this->m_clickAnimationProgress) / double(tsl::style::ListItemHighlightLength));
+
+                animColor.g = saturation;
+                animColor.b = saturation;
+
+                renderer->drawRect(ELEMENT_BOUNDS(this), a(animColor));
+            }
+
+            /**
+             * @brief Draws the back background when a element is highlighted
+             * @note Override this if you have a element that e.g requires a non-rectangular focus
+             *
+             * @param renderer Renderer
+             */
+            virtual void drawFocusBackground(gfx::Renderer *renderer) {
+                renderer->drawRect(ELEMENT_BOUNDS(this), a(0xF000));
+
+                if (this->m_clickAnimationProgress > 0) {
+                    this->drawClickAnimation(renderer);
+                    this->m_clickAnimationProgress--;
+                }
+            }
+
+            /**
+             * @brief Draws the blue boarder when a element is highlighted
+             * @note Override this if you have a element that e.g requires a non-rectangular focus
+             *
+             * @param renderer Renderer
+             */
+            virtual void drawHighlight(gfx::Renderer *renderer) {
+                static double counter = 0;
+                const double progress = (std::sin(counter) + 1) / 2;
+                Color highlightColor = {   static_cast<u8>((0x2 - 0x8) * progress + 0x8),
+                                                static_cast<u8>((0x8 - 0xF) * progress + 0xF),
+                                                static_cast<u8>((0xC - 0xF) * progress + 0xF),
+                                                0xF };
+
+                counter += 0.1F;
+
+                s32 x = 0, y = 0;
+
+                if (this->m_highlightShaking) {
+                    auto t = (std::chrono::system_clock::now() - this->m_highlightShakingStartTime);
+                    if (t >= 100ms)
+                        this->m_highlightShaking = false;
+                    else {
+                        s32 amplitude = std::rand() % 5 + 5;
+
+                        switch (this->m_highlightShakingDirection) {
+                            case FocusDirection::Up:
+                                y -= shakeAnimation(t, amplitude);
+                                break;
+                            case FocusDirection::Down:
+                                y += shakeAnimation(t, amplitude);
+                                break;
+                            case FocusDirection::Left:
+                                x -= shakeAnimation(t, amplitude);
+                                break;
+                            case FocusDirection::Right:
+                                x += shakeAnimation(t, amplitude);
+                                break;
+                            default:
+                                break;
+                        }
+
+                        x = std::clamp(x, -amplitude, amplitude);
+                        y = std::clamp(y, -amplitude, amplitude);
+                    }
+                }
+
+                renderer->drawRect(this->getX() + x - 4, this->getY() + y - 4, this->getWidth() + 8, 4, a(highlightColor));
+                renderer->drawRect(this->getX() + x - 4, this->getY() + y + this->getHeight(), this->getWidth() + 8, 4, a(highlightColor));
+                renderer->drawRect(this->getX() + x - 4, this->getY() + y, 4, this->getHeight(), a(highlightColor));
+                renderer->drawRect(this->getX() + x + this->getWidth(), this->getY() + y, 4, this->getHeight(), a(highlightColor));
+
+            }
+
+            /**
+             * @brief Sets the boundaries of this view
+             *
+             * @param x Start X pos
+             * @param y Start Y pos
+             * @param width Width
+             * @param height Height
+             */
+            void setBoundaries(s32 x, s32 y, s32 width, s32 height) {
+                this->m_x = x;
+                this->m_y = y;
+                this->m_width = width;
+                this->m_height = height;
+            }
+
+            /**
+             * @brief Adds a click listener to the element
+             *
+             * @param clickListener Click listener called with keys that were pressed last frame. Callback should return true if keys got consumed
+             */
+            virtual void setClickListener(std::function<bool(u64 keys)> clickListener) {
+                this->m_clickListener = clickListener;
+            }
+
+            /**
+             * @brief Gets the element's X position
+             *
+             * @return X position
+             */
+            inline s32 getX() { return this->m_x; }
+            /**
+             * @brief Gets the element's Y position
+             *
+             * @return Y position
+             */
+            inline s32 getY() { return this->m_y; }
+            /**
+             * @brief Gets the element's Width
+             *
+             * @return Width
+             */
+            inline s32 getWidth() { return this->m_width;  }
+            /**
+             * @brief Gets the element's Height
+             *
+             * @return Height
+             */
+            inline s32 getHeight() { return this->m_height; }
+
+            inline s32 getTopBound() { return this->getY(); }
+            inline s32 getLeftBound() { return this->getX(); }
+            inline s32 getRightBound() { return this->getX() + this->getWidth(); }
+            inline s32 getBottomBound() { return this->getY() + this->getHeight(); }
+
+            /**
+             * @brief Check if the coordinates are in the elements bounds
+             *
+             * @return true if coordinates are in bounds, false otherwise
+             */
+            bool inBounds(s32 touchX, s32 touchY) {
+                return touchX >= this->getLeftBound() && touchX <= this->getRightBound() && touchY >= this->getTopBound() && touchY <= this->getBottomBound();
+            }
+
+            /**
+             * @brief Sets the element's parent
+             * @note This is required to handle focus and button downpassing properly
+             *
+             * @param parent Parent
+             */
+            inline void setParent(Element *parent) { this->m_parent = parent; }
+
+            /**
+             * @brief Get the element's parent
+             *
+             * @return Parent
+             */
+            inline Element* getParent() { return this->m_parent; }
+
+            /**
+             * @brief Marks this element as focused or unfocused to draw the highlight
+             *
+             * @param focused Focused
+             */
+            virtual inline void setFocused(bool focused) {
+                this->m_focused = focused;
+                this->m_clickAnimationProgress = 0;
+            }
+
+
+            static InputMode getInputMode() { return Element::s_inputMode; }
+
+            static void setInputMode(InputMode mode) { Element::s_inputMode = mode; }
+
+        protected:
+            constexpr static inline auto a = &gfx::Renderer::a;
+            bool m_focused = false;
+            u8 m_clickAnimationProgress = 0;
+
+            // Highlight shake animation
+            bool m_highlightShaking = false;
+            std::chrono::system_clock::time_point m_highlightShakingStartTime;
+            FocusDirection m_highlightShakingDirection;
+
+            static inline InputMode s_inputMode;
+
+            /**
+             * @brief Shake animation callculation based on a damped sine wave
+             *
+             * @param t Passed time
+             * @param a Amplitude
+             * @return Damped sine wave output
+             */
+            int shakeAnimation(std::chrono::system_clock::duration t, double a) {
+                double w = 0.2F;
+                double tau = 0.05F;
+
+                int t_ = t.count() / 1'000'000;
+
+                return roundf(a * exp(-(tau * t_) * sin(w * t_)));
+            }
+
+        private:
+            friend class Gui;
+
+            s32 m_x = 0, m_y = 0, m_width = 0, m_height = 0;
+            Element *m_parent = nullptr;
+
+            std::function<bool(u64 keys)> m_clickListener = [](u64) { return false; };
+
+        };
+
+        /**
+         * @brief A Element that exposes the renderer directly to draw custom views easily
+         */
+        class CustomDrawer : public Element {
+        public:
+            /**
+             * @brief Constructor
+             * @note This element should only be used to draw static things the user cannot interact with e.g info text, images, etc.
+             *
+             * @param renderFunc Callback that will be called once every frame to draw this view
+             */
+            CustomDrawer(std::function<void(gfx::Renderer* r, s32 x, s32 y, s32 w, s32 h)> renderFunc) : Element(), m_renderFunc(renderFunc) {}
+            virtual ~CustomDrawer() {}
+
+            virtual void draw(gfx::Renderer* renderer) override {
+                renderer->enableScissoring(ELEMENT_BOUNDS(this));
+                this->m_renderFunc(renderer, ELEMENT_BOUNDS(this));
+                renderer->disableScissoring();
+            }
+
+            virtual void layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) override {
+
+            }
+
+        private:
+            std::function<void(gfx::Renderer*, s32 x, s32 y, s32 w, s32 h)> m_renderFunc;
+        };
+
+
+        /**
+         * @brief The base frame which can contain another view
+         *
+         */
+        class OverlayFrame : public Element {
+        public:
+            /**
+             * @brief Constructor
+             *
+             * @param title Name of the Overlay drawn bolt at the top
+             * @param subtitle Subtitle drawn bellow the title e.g version number
+             */
+            OverlayFrame(const std::string& title, const std::string& subtitle) : Element(), m_title(title), m_subtitle(subtitle) {}
+            virtual ~OverlayFrame() {
+                if (this->m_contentElement != nullptr)
+                    delete this->m_contentElement;
+            }
+
+            virtual void draw(gfx::Renderer *renderer) override {
+                renderer->fillScreen(a(tsl::style::color::ColorFrameBackground));
+                renderer->drawRect(tsl::cfg::FramebufferWidth - 1, 0, 1, tsl::cfg::FramebufferHeight, a(0xF222));
+
+                renderer->drawString(this->m_title.c_str(), false, 20, 50, 30, a(tsl::style::color::ColorText));
+                renderer->drawString(this->m_subtitle.c_str(), false, 20, 70, 15, a(tsl::style::color::ColorDescription));
+
+                renderer->drawRect(15, tsl::cfg::FramebufferHeight - 73, tsl::cfg::FramebufferWidth - 30, 1, a(tsl::style::color::ColorText));
+
+                renderer->drawString("\uE0E1  Back     \uE0E0  OK", false, 30, 693, 23, a(tsl::style::color::ColorText));
+
+                if (this->m_contentElement != nullptr)
+                    this->m_contentElement->frame(renderer);
+            }
+
+            virtual void layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) override {
+                this->setBoundaries(parentX, parentY, parentWidth, parentHeight);
+
+                if (this->m_contentElement != nullptr) {
+                    this->m_contentElement->setBoundaries(parentX + 35, parentY + 125, parentWidth - 85, parentHeight - 73 - 125);
+                    this->m_contentElement->invalidate();
+                }
+            }
+
+            virtual Element* requestFocus(Element *oldFocus, FocusDirection direction) override {
+                if (this->m_contentElement != nullptr)
+                    return this->m_contentElement->requestFocus(oldFocus, direction);
+                else
+                    return nullptr;
+            }
+
+            virtual bool onTouch(TouchEvent event, s32 currX, s32 currY, s32 prevX, s32 prevY, s32 initialX, s32 initialY) {
+                // Discard touches outside bounds
+                if (!this->m_contentElement->inBounds(currX, currY))
+                    return false;
+
+                if (this->m_contentElement != nullptr)
+                    return this->m_contentElement->onTouch(event, currX, currY, prevX, prevY, initialX, initialY);
+                else return false;
+            }
+
+            /**
+             * @brief Sets the content of the frame
+             *
+             * @param content Element
+             */
+            void setContent(Element *content) {
+                if (this->m_contentElement != nullptr)
+                    delete this->m_contentElement;
+
+                this->m_contentElement = content;
+
+                if (content != nullptr) {
+                    this->m_contentElement->setParent(this);
+                    this->invalidate();
+                }
+            }
+
+            /**
+             * @brief Changes the title of the menu
+             *
+             * @param title Title to change to
+             */
+            void setTitle(const std::string &title) {
+                this->m_title = title;
+            }
+
+            /**
+             * @brief Changes the subtitle of the menu
+             *
+             * @param title Subtitle to change to
+             */
+            void setSubtitle(const std::string &subtitle) {
+                this->m_subtitle = subtitle;
+            }
+
+        protected:
+            Element *m_contentElement = nullptr;
+
+            std::string m_title, m_subtitle;
+        };
+
+        /**
+         * @brief The base frame which can contain another view with a customizable header
+         *
+         */
+        class HeaderOverlayFrame : public Element {
+        public:
+            HeaderOverlayFrame(u16 headerHeight = 175) : Element(), m_headerHeight(headerHeight) {}
+            virtual ~HeaderOverlayFrame() {
+                if (this->m_contentElement != nullptr)
+                    delete this->m_contentElement;
+
+                if (this->m_header != nullptr)
+                    delete this->m_header;
+            }
+
+            virtual void draw(gfx::Renderer *renderer) override {
+                renderer->fillScreen(a(tsl::style::color::ColorFrameBackground));
+                renderer->drawRect(tsl::cfg::FramebufferWidth - 1, 0, 1, tsl::cfg::FramebufferHeight, a(0xF222));
+
+                renderer->drawRect(15, tsl::cfg::FramebufferHeight - 73, tsl::cfg::FramebufferWidth - 30, 1, a(tsl::style::color::ColorText));
+
+                renderer->drawString("\uE0E1  Back     \uE0E0  OK", false, 30, 693, 23, a(tsl::style::color::ColorText));
+
+                if (this->m_header != nullptr)
+                    this->m_header->frame(renderer);
+
+                if (this->m_contentElement != nullptr)
+                    this->m_contentElement->frame(renderer);
+            }
+
+            virtual void layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) override {
+                this->setBoundaries(parentX, parentY, parentWidth, parentHeight);
+
+                if (this->m_contentElement != nullptr) {
+                    this->m_contentElement->setBoundaries(parentX + 35, parentY + this->m_headerHeight, parentWidth - 85, parentHeight - 73 - this->m_headerHeight);
+                    this->m_contentElement->invalidate();
+                }
+
+                if (this->m_header != nullptr) {
+                    this->m_header->setBoundaries(parentX, parentY, parentWidth, this->m_headerHeight);
+                    this->m_header->invalidate();
+                }
+            }
+
+            virtual bool onTouch(TouchEvent event, s32 currX, s32 currY, s32 prevX, s32 prevY, s32 initialX, s32 initialY) {
+                // Discard touches outside bounds
+                if (!this->m_contentElement->inBounds(currX, currY))
+                    return false;
+
+                if (this->m_contentElement != nullptr)
+                    return this->m_contentElement->onTouch(event, currX, currY, prevX, prevY, initialX, initialY);
+                else return false;
+            }
+
+            virtual Element* requestFocus(Element *oldFocus, FocusDirection direction) override {
+                if (this->m_contentElement != nullptr)
+                    return this->m_contentElement->requestFocus(oldFocus, direction);
+                else
+                    return nullptr;
+            }
+
+            /**
+             * @brief Sets the content of the frame
+             *
+             * @param content Element
+             */
+            void setContent(Element *content) {
+                if (this->m_contentElement != nullptr)
+                    delete this->m_contentElement;
+
+                this->m_contentElement = content;
+
+                if (content != nullptr) {
+                    this->m_contentElement->setParent(this);
+                    this->invalidate();
+                }
+            }
+
+            /**
+             * @brief Sets the header of the frame
+             *
+             * @param header Header custom drawer
+             */
+            void setHeader(CustomDrawer *header) {
+                if (this->m_header != nullptr)
+                    delete this->m_header;
+
+                this->m_header = header;
+
+                if (header != nullptr) {
+                    this->m_header->setParent(this);
+                    this->invalidate();
+                }
+            }
+
+        protected:
+            Element *m_contentElement = nullptr;
+            CustomDrawer *m_header = nullptr;
+
+            u16 m_headerHeight;
+        };
+
+        /**
+         * @brief Single color rectangle element mainly used for debugging to visualize boundaries
+         *
+         */
+        class DebugRectangle : public Element {
+        public:
+            /**
+             * @brief Constructor
+             *
+             * @param color Color of the rectangle
+             */
+            DebugRectangle(Color color) : Element(), m_color(color) {}
+            virtual ~DebugRectangle() {}
+
+            virtual void draw(gfx::Renderer *renderer) override {
+                renderer->drawRect(ELEMENT_BOUNDS(this), a(this->m_color));
+            }
+
+            virtual void layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) override {}
+
+        private:
+            Color m_color;
+        };
+
+
+        /**
+         * @brief A List containing list items
+         *
+         */
+        class List : public Element {
+        public:
+            /**
+             * @brief Constructor
+             *
+             */
+            List() : Element() {}
+            virtual ~List() {
+                for (auto& item : this->m_items)
+                    delete item;
+            }
+
+            virtual void draw(gfx::Renderer *renderer) override {
+                if (this->m_clearList) {
+                    for (auto& item : this->m_items)
+                        delete item;
+
+                    this->m_items.clear();
+                    this->m_offset = 0;
+                    this->m_focusedIndex = 0;
+                    this->invalidate();
+                    this->m_clearList = false;
+                }
+
+                for (auto [index, element] : this->m_itemsToAdd) {
+                    element->invalidate();
+                    if (index >= 0 && (this->m_items.size() > static_cast<size_t>(index))) {
+                        const auto& it = this->m_items.cbegin() + static_cast<size_t>(index);
+                        this->m_items.insert(it, element);
+                    } else {
+                        this->m_items.push_back(element);
+                    }
+                    this->invalidate();
+                    this->updateScrollOffset();
+                }
+                this->m_itemsToAdd.clear();
+
+                for (auto element : this->m_itemsToRemove) {
+                    for (auto it = m_items.cbegin(); it != m_items.cend(); ++it) {
+                        if (*it == element) {
+                            this->m_items.erase(it);
+                            if (this->m_focusedIndex >= (it - this->m_items.cbegin())) {
+                                this->m_focusedIndex--;
+                            }
+                            this->invalidate();
+                            this->updateScrollOffset();
+                            delete element;
+                            break;
+                        }
+                    }
+                }
+                this->m_itemsToRemove.clear();
+
+                renderer->enableScissoring(this->getLeftBound(), this->getTopBound() - 5, this->getWidth(), this->getHeight() + 4);
+
+                for (auto &entry : this->m_items) {
+                    if (entry->getBottomBound() > this->getTopBound() && entry->getTopBound() < this->getBottomBound()) {
+                        entry->frame(renderer);
+                    }
+                }
+
+                renderer->disableScissoring();
+
+                if (this->m_listHeight > this->getHeight()) {
+                    double scrollbarHeight = static_cast<double>(this->getHeight() * this->getHeight()) / this->m_listHeight;
+                    double scrollbarOffset = (static_cast<double>(this->m_offset)) / static_cast<double>(this->m_listHeight - this->getHeight()) * (this->getHeight() - std::ceil(scrollbarHeight));
+
+                    renderer->drawRect(this->getRightBound() + 10, this->getY() + scrollbarOffset, 5, scrollbarHeight - 50, a(tsl::style::color::ColorHandle));
+                    renderer->drawCircle(this->getRightBound() + 12, this->getY() + scrollbarOffset, 2, true, a(tsl::style::color::ColorHandle));
+                    renderer->drawCircle(this->getRightBound() + 12, this->getY() + scrollbarOffset + scrollbarHeight - 50, 2, true, a(tsl::style::color::ColorHandle));
+
+                    double prevOffset = this->m_offset;
+
+                    if (Element::getInputMode() == InputMode::Controller)
+                        this->m_offset += ((this->m_nextOffset) - this->m_offset) * 0.1F;
+                    else if (Element::getInputMode() == InputMode::TouchScroll)
+                        this->m_offset += ((this->m_nextOffset) - this->m_offset);
+
+                    if (static_cast<u32>(prevOffset) != static_cast<u32>(this->m_offset))
+                        this->invalidate();
+                }
+
+            }
+
+            virtual void layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) override {
+                s32 y = this->getY() - this->m_offset;
+
+                this->m_listHeight = 0;
+                for (auto &entry : this->m_items)
+                    this->m_listHeight += entry->getHeight();
+
+                for (auto &entry : this->m_items) {
+                    entry->setBoundaries(this->getX(), y, this->getWidth(), entry->getHeight());
+                    entry->invalidate();
+                    y += entry->getHeight();
+                }
+            }
+
+            virtual bool onTouch(TouchEvent event, s32 currX, s32 currY, s32 prevX, s32 prevY, s32 initialX, s32 initialY) {
+                bool handled = false;
+
+                // Discard touches out of bounds
+                if (!this->inBounds(currX, currY))
+                    return false;
+
+                // Direct touches to all children
+                for (auto &item : this->m_items)
+                    handled |= item->onTouch(event, currX, currY, prevX, prevY, initialX, initialY);
+
+                if (handled)
+                    return true;
+
+                // Handle scrolling
+                if (event != TouchEvent::Release && Element::getInputMode() == InputMode::TouchScroll) {
+                    if (prevX != 0 && prevY != 0)
+                        this->m_nextOffset += (prevY - currY);
+
+                    if (this->m_nextOffset < 0)
+                        this->m_nextOffset = 0;
+
+                    if (this->m_nextOffset > (this->m_listHeight - this->getHeight()) + 50)
+                        this->m_nextOffset = (this->m_listHeight - this->getHeight() + 50);
+
+                    return true;
+                }
+
+                return false;
+            }
+
+            /**
+             * @brief Adds a new item to the list before the next frame starts
+             *
+             * @param element Element to add
+             * @param index Index in the list where the item should be inserted. -1 or greater list size will insert it at the end
+             * @param height Height of the element. Don't set this parameter for libtesla to try and figure out the size based on the type
+             */
+            void addItem(Element *element, u16 height = 0, ssize_t index = -1) {
+                if (element != nullptr) {
+                    if (height != 0)
+                        element->setBoundaries(this->getX(), this->getY(), this->getWidth(), height);
+
+                    element->setParent(this);
+                    element->invalidate();
+
+                    this->m_itemsToAdd.emplace_back(index, element);
+                }
+            }
+
+            /**
+             * @brief Removes an item form the list and deletes it
+             * @note Item will only be deleted if it was found in the list
+             *
+             * @param element Element to remove from list. Call \ref Gui::removeFocus before.
+             */
+            virtual void removeItem(Element *element) {
+                if (element != nullptr)
+                    this->m_itemsToRemove.emplace_back(element);
+            }
+
+            /**
+             * @brief Try to remove an item from the list
+             *
+             * @param index Index of element in list. Call \ref Gui::removeFocus before.
+             */
+            virtual void removeIndex(size_t index) {
+                if (index < this->m_items.size())
+                    removeItem(this->m_items[index]);
+            }
+
+            /**
+             * @brief Removes all children from the list later on
+             * @warning When clearing a list, make sure none of the its children are focused. Call \ref Gui::removeFocus before.
+             */
+            void clear() {
+                this->m_clearList = true;
+            }
+
+            virtual Element* requestFocus(Element *oldFocus, FocusDirection direction) override {
+                Element *newFocus = nullptr;
+
+                if (this->m_clearList || this->m_itemsToAdd.size() > 0)
+                    return nullptr;
+
+                if (direction == FocusDirection::None) {
+                    u16 i = 0;
+
+                    if (oldFocus == nullptr) {
+                        s32 elementHeight = 0;
+                        while (elementHeight < this->m_offset && i < this->m_items.size() - 1) {
+                            i++;
+                            elementHeight += this->m_items[i]->getHeight();
+                        }
+                    }
+
+                    for (; i < this->m_items.size(); i++) {
+                        newFocus = this->m_items[i]->requestFocus(oldFocus, direction);
+
+                        if (newFocus != nullptr) {
+                            this->m_focusedIndex = i;
+
+                            this->updateScrollOffset();
+                            return newFocus;
+                        }
+                    }
+                } else {
+                    if (direction == FocusDirection::Down) {
+
+                        for (u16 i = this->m_focusedIndex + 1; i < this->m_items.size(); i++) {
+                            newFocus = this->m_items[i]->requestFocus(oldFocus, direction);
+
+                            if (newFocus != nullptr && newFocus != oldFocus) {
+                                this->m_focusedIndex = i;
+
+                                this->updateScrollOffset();
+                                return newFocus;
+                            }
+                        }
+
+                        return oldFocus;
+                    } else if (direction == FocusDirection::Up) {
+                        if (this->m_focusedIndex > 0) {
+
+                            for (u16 i = this->m_focusedIndex - 1; i >= 0; i--) {
+                                if (i > this->m_items.size() || this->m_items[i] == nullptr)
+                                    return oldFocus;
+                                else
+                                    newFocus = this->m_items[i]->requestFocus(oldFocus, direction);
+
+                                if (newFocus != nullptr && newFocus != oldFocus) {
+                                    this->m_focusedIndex = i;
+
+                                    this->updateScrollOffset();
+                                    return newFocus;
+                                }
+                            }
+                        }
+
+                        return oldFocus;
+                    }
+                }
+
+                return oldFocus;
+            }
+
+            /**
+             * @brief Gets the item at the index in the list
+             *
+             * @param index Index position in list
+             * @return Element from list. nullptr for if the index is out of bounds
+             */
+            virtual Element* getItemAtIndex(u32 index) {
+                if (this->m_items.size() <= index)
+                    return nullptr;
+
+                return this->m_items[index];
+            }
+
+            /**
+             * @brief Gets the index in the list of the element passed in
+             *
+             * @param element Element to check
+             * @return Index in list. -1 for if the element isn't a member of the list
+             */
+            virtual s32 getIndexInList(Element *element) {
+                auto it = std::find(this->m_items.begin(), this->m_items.end(), element);
+
+                if (it == this->m_items.end())
+                    return -1;
+
+                return it - this->m_items.begin();
+            }
+
+            virtual void setFocusedIndex(u32 index) {
+                if (this->m_items.size() > index) {
+                    m_focusedIndex = index;
+                    this->updateScrollOffset();
+                }
+            }
+
+        protected:
+            std::vector<Element*> m_items;
+            u16 m_focusedIndex = 0;
+
+            double m_offset = 0, m_nextOffset = 0;
+            s32 m_listHeight = 0;
+
+            bool m_clearList = false;
+            std::vector<Element *> m_itemsToRemove;
+            std::vector<std::pair<ssize_t, Element *>> m_itemsToAdd;
+
+        private:
+
+            virtual void updateScrollOffset() {
+                if (this->getInputMode() != InputMode::Controller)
+                    return;
+
+                if (this->m_listHeight <= this->getHeight()) {
+                    this->m_nextOffset = 0;
+                    this->m_offset = 0;
+
+                    return;
+                }
+
+                this->m_nextOffset = 0;
+                for (u16 i = 0; i < this->m_focusedIndex; i++)
+                    this->m_nextOffset += this->m_items[i]->getHeight();
+
+                this->m_nextOffset -= this->getHeight() / 3;
+
+                if (this->m_nextOffset < 0)
+                    this->m_nextOffset = 0;
+
+                if (this->m_nextOffset > (this->m_listHeight - this->getHeight()) + 50)
+                    this->m_nextOffset = (this->m_listHeight - this->getHeight() + 50);
+            }
+        };
+
+        /**
+         * @brief A item that goes into a list
+         *
+         */
+        class ListItem : public Element {
+        public:
+            /**
+             * @brief Constructor
+             *
+             * @param text Initial description text
+             */
+            ListItem(const std::string& text, const std::string& value = "")
+                : Element(), m_text(text), m_value(value) {
+            }
+            virtual ~ListItem() {}
+
+            virtual void draw(gfx::Renderer *renderer) override {
+                if (this->m_touched && Element::getInputMode() == InputMode::Touch) {
+                    renderer->drawRect(ELEMENT_BOUNDS(this), a(tsl::style::color::ColorClickAnimation));
+                }
+
+                if (this->m_maxWidth == 0) {
+                    if (this->m_value.length() > 0) {
+                        auto [valueWidth, valueHeight] = renderer->drawString(this->m_value.c_str(), false, 0, 0, 20, tsl::style::color::ColorTransparent);
+                        this->m_maxWidth = this->getWidth() - valueWidth - 70;
+                    } else {
+                        this->m_maxWidth = this->getWidth() - 40;
+                    }
+
+                    auto [width, height] = renderer->drawString(this->m_text.c_str(), false, 0, 0, 23, tsl::style::color::ColorTransparent);
+                    this->m_trunctuated = width > this->m_maxWidth;
+
+                    if (this->m_trunctuated) {
+                        this->m_scrollText = this->m_text + "        ";
+                        auto [width, height] = renderer->drawString(this->m_scrollText.c_str(), false, 0, 0, 23, tsl::style::color::ColorTransparent);
+                        this->m_scrollText += this->m_text;
+                        this->m_textWidth = width;
+                        this->m_ellipsisText = renderer->limitStringLength(this->m_text, false, 22, this->m_maxWidth);
+                    } else {
+                        this->m_textWidth = width;
+                    }
+                }
+
+                renderer->drawRect(this->getX(), this->getY(), this->getWidth(), 1, a(tsl::style::color::ColorFrame));
+                renderer->drawRect(this->getX(), this->getTopBound(), this->getWidth(), 1, a(tsl::style::color::ColorFrame));
+
+                if (this->m_trunctuated) {
+                    if (this->m_focused) {
+                        renderer->enableScissoring(this->getX(), this->getY(), this->m_maxWidth + 40, this->getHeight());
+                        renderer->drawString(this->m_scrollText.c_str(), false, this->getX() + 20 - this->m_scrollOffset, this->getY() + 45, 23, tsl::style::color::ColorText);
+                        renderer->disableScissoring();
+                        if (this->m_scrollAnimationCounter == 90) {
+                            if (this->m_scrollOffset == this->m_textWidth) {
+                                this->m_scrollOffset = 0;
+                                this->m_scrollAnimationCounter = 0;
+                            } else {
+                                this->m_scrollOffset++;
+                            }
+                        } else {
+                            this->m_scrollAnimationCounter++;
+                        }
+                    } else {
+                        renderer->drawString(this->m_ellipsisText.c_str(), false, this->getX() + 20, this->getY() + 45, 23, a(tsl::style::color::ColorText));
+                    }
+                } else {
+                    renderer->drawString(this->m_text.c_str(), false, this->getX() + 20, this->getY() + 45, 23, a(tsl::style::color::ColorText));
+                }
+
+                renderer->drawString(this->m_value.c_str(), false, this->getX() + this->m_maxWidth + 45, this->getY() + 45, 20, this->m_faint ? a(tsl::style::color::ColorDescription) : a(tsl::style::color::ColorHighlight));
+            }
+
+            virtual void layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) override {
+                this->setBoundaries(this->getX(), this->getY(), this->getWidth(), tsl::style::ListItemDefaultHeight);
+            }
+
+            virtual bool onClick(u64 keys) override {
+                if (keys & HidNpadButton_A)
+                    this->triggerClickAnimation();
+                else if (keys & (HidNpadButton_AnyUp | HidNpadButton_AnyDown | HidNpadButton_AnyLeft | HidNpadButton_AnyRight))
+                    this->m_clickAnimationProgress = 0;
+
+                return Element::onClick(keys);
+            }
+
+
+            virtual bool onTouch(TouchEvent event, s32 currX, s32 currY, s32 prevX, s32 prevY, s32 initialX, s32 initialY) override {
+                if (event == TouchEvent::Touch)
+                    this->m_touched = this->inBounds(currX, currY);
+
+                if (event == TouchEvent::Release && this->m_touched) {
+                    this->m_touched = false;
+
+                    if (Element::getInputMode() == InputMode::Touch) {
+                        bool handled = this->onClick(HidNpadButton_A);
+
+                        this->m_clickAnimationProgress = 0;
+                        return handled;
+                    }
+                }
+
+
+                return false;
+            }
+
+
+            virtual void setFocused(bool state) override {
+                this->m_scroll = false;
+                this->m_scrollOffset = 0;
+                this->m_scrollAnimationCounter = 0;
+                Element::setFocused(state);
+            }
+
+            virtual Element* requestFocus(Element *oldFocus, FocusDirection direction) override {
+                return this;
+            }
+
+            /**
+             * @brief Sets the left hand description text of the list item
+             *
+             * @param text Text
+             */
+            inline void setText(const std::string& text) {
+                this->m_text = text;
+                this->m_scrollText = "";
+                this->m_ellipsisText = "";
+                this->m_maxWidth = 0;
+            }
+
+            /**
+             * @brief Sets the right hand value text of the list item
+             *
+             * @param value Text
+             * @param faint Should the text be drawn in a glowing green or a faint gray
+             */
+            inline void setValue(const std::string& value, bool faint = false) {
+                this->m_value = value;
+                this->m_faint = faint;
+                this->m_maxWidth = 0;
+            }
+
+            /**
+             * @brief Gets the left hand description text of the list item
+             *
+             * @return Text
+             */
+            inline const std::string& getText() const {
+                return this->m_text;
+            }
+
+            /**
+             * @brief Gets the right hand value text of the list item
+             *
+             * @return Value
+             */
+            inline const std::string& getValue() {
+                return this->m_value;
+            }
+
+        protected:
+            std::string m_text;
+            std::string m_value = "";
+            std::string m_scrollText = "";
+            std::string m_ellipsisText = "";
+
+            bool m_scroll = false;
+            bool m_trunctuated = false;
+            bool m_faint = false;
+
+            bool m_touched = false;
+
+            u16 m_maxScroll = 0;
+            u16 m_scrollOffset = 0;
+            u32 m_maxWidth = 0;
+            u32 m_textWidth = 0;
+            u16 m_scrollAnimationCounter = 0;
+        };
+
+        /**
+         * @brief A toggleable list item that changes the state from On to Off when the A button gets pressed
+         *
+         */
+        class ToggleListItem : public ListItem {
+        public:
+            /**
+             * @brief Constructor
+             *
+             * @param text Initial description text
+             * @param initialState Is the toggle set to On or Off initially
+             * @param onValue Value drawn if the toggle is on
+             * @param offValue Value drawn if the toggle is off
+             */
+            ToggleListItem(const std::string& text, bool initialState, const std::string& onValue = "On", const std::string& offValue = "Off")
+                : ListItem(text), m_state(initialState), m_onValue(onValue), m_offValue(offValue) {
+
+                this->setState(this->m_state);
+            }
+
+            virtual ~ToggleListItem() {}
+
+            virtual bool onClick(u64 keys) override {
+                if (keys & HidNpadButton_A) {
+                    this->m_state = !this->m_state;
+
+                    this->setState(this->m_state);
+                    this->m_stateChangedListener(this->m_state);
+
+                    return ListItem::onClick(keys);
+                }
+
+                return false;
+            }
+
+            /**
+             * @brief Gets the current state of the toggle
+             *
+             * @return State
+             */
+            virtual inline bool getState() {
+                return this->m_state;
+            }
+
+            /**
+             * @brief Sets the current state of the toggle. Updates the Value
+             *
+             * @param state State
+             */
+            virtual void setState(bool state) {
+                this->m_state = state;
+
+                if (state)
+                    this->setValue(this->m_onValue, false);
+                else
+                    this->setValue(this->m_offValue, true);
+            }
+
+            /**
+             * @brief Adds a listener that gets called whenever the state of the toggle changes
+             *
+             * @param stateChangedListener Listener with the current state passed in as parameter
+             */
+            void setStateChangedListener(std::function<void(bool)> stateChangedListener) {
+                this->m_stateChangedListener = stateChangedListener;
+            }
+
+        protected:
+            bool m_state = true;
+            std::string m_onValue, m_offValue;
+
+            std::function<void(bool)> m_stateChangedListener = [](bool){};
+        };
+
+        class CategoryHeader : public Element {
+        public:
+            CategoryHeader(const std::string &title, bool hasSeparator = false) : m_text(title), m_hasSeparator(hasSeparator) {}
+            virtual ~CategoryHeader() {}
+
+            virtual void draw(gfx::Renderer *renderer) override {
+                renderer->drawRect(this->getX() - 2, this->getBottomBound() - 30, 5, 23, a(tsl::style::color::ColorHeaderBar));
+                renderer->drawString(this->m_text.c_str(), false, this->getX() + 13, this->getBottomBound() - 12, 15, a(tsl::style::color::ColorText));
+
+                if (this->m_hasSeparator)
+                    renderer->drawRect(this->getX(), this->getBottomBound(), this->getWidth(), 1, a(tsl::style::color::ColorFrame));
+            }
+
+            virtual void layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) override {
+                // Check if the CategoryHeader is part of a list and if it's the first entry in it, half it's height
+                if (List *list = dynamic_cast<List*>(this->getParent()); list != nullptr) {
+                    if (list->getIndexInList(this) == 0) {
+                        this->setBoundaries(this->getX(), this->getY(), this->getWidth(), tsl::style::ListItemDefaultHeight / 2);
+                        return;
+                    }
+                }
+
+                this->setBoundaries(this->getX(), this->getY(), this->getWidth(), tsl::style::ListItemDefaultHeight);
+            }
+
+            virtual bool onClick(u64 keys) {
+                return false;
+            }
+
+            virtual Element* requestFocus(Element *oldFocus, FocusDirection direction) override {
+                return nullptr;
+            }
+
+            inline void setText(const std::string &text) {
+                this->m_text = text;
+            }
+
+            inline const std::string& getText() const {
+                return this->m_text;
+            }
+
+        private:
+            std::string m_text;
+            bool m_hasSeparator;
+        };
+
+        /**
+         * @brief A customizable analog trackbar going from 0% to 100% (like the brightness slider)
+         *
+         */
+        class TrackBar : public Element {
+        public:
+            /**
+             * @brief Constructor
+             *
+             * @param icon Icon shown next to the track bar
+             */
+            TrackBar(const char icon[3]) : m_icon(icon) { }
+
+            virtual ~TrackBar() {}
+
+            virtual Element* requestFocus(Element *oldFocus, FocusDirection direction) {
+                return this;
+            }
+
+            virtual bool handleInput(u64 keysDown, u64 keysHeld, const HidTouchState &touchPos, HidAnalogStickState leftJoyStick, HidAnalogStickState rightJoyStick) override {
+                if (keysHeld & HidNpadButton_AnyLeft && keysHeld & HidNpadButton_AnyRight)
+                    return true;
+
+                if (keysHeld & HidNpadButton_AnyLeft) {
+                    if (this->m_value > 0) {
+                        this->m_value--;
+                        this->m_valueChangedListener(this->m_value);
+                        return true;
+                    }
+                }
+
+                if (keysHeld & HidNpadButton_AnyRight) {
+                    if (this->m_value < 100) {
+                        this->m_value++;
+                        this->m_valueChangedListener(this->m_value);
+                        return true;
+                    }
+                }
+
+                return false;
+            }
+
+            virtual bool onTouch(TouchEvent event, s32 currX, s32 currY, s32 prevX, s32 prevY, s32 initialX, s32 initialY) override {
+                if (event == TouchEvent::Release) {
+                    this->m_interactionLocked = false;
+                    return false;
+                }
+
+
+                if (!this->m_interactionLocked && this->inBounds(initialX, initialY)) {
+                    if (currX > this->getLeftBound() + 50 && currX < this->getRightBound() && currY > this->getTopBound() && currY < this->getBottomBound()) {
+                        s16 newValue = (static_cast<double>(currX - (this->getX() + 60)) / static_cast<double>(this->getWidth() - 95)) * 100;
+
+                        if (newValue < 0) {
+                            newValue = 0;
+                        } else if (newValue > 100) {
+                            newValue = 100;
+                        }
+
+                        if (newValue != this->m_value) {
+                            this->m_value = newValue;
+                            this->m_valueChangedListener(this->getProgress());
+                        }
+
+                        return true;
+                    }
+                }
+                else
+                    this->m_interactionLocked = true;
+
+                return false;
+            }
+
+            virtual void draw(gfx::Renderer *renderer) override {
+                renderer->drawRect(this->getX(), this->getY(), this->getWidth(), 1, a(tsl::style::color::ColorFrame));
+                renderer->drawRect(this->getX(), this->getBottomBound(), this->getWidth(), 1, a(tsl::style::color::ColorFrame));
+
+                renderer->drawString(this->m_icon, false, this->getX() + 15, this->getY() + 50, 23, a(tsl::style::color::ColorText));
+
+                u16 handlePos = (this->getWidth() - 95) * static_cast<double>(this->m_value) / 100;
+                renderer->drawCircle(this->getX() + 60, this->getY() + 42, 2, true, a(tsl::style::color::ColorHighlight));
+                renderer->drawCircle(this->getX() + 60 + this->getWidth() - 95, this->getY() + 42, 2, true, a(tsl::style::color::ColorFrame));
+                renderer->drawRect(this->getX() + 60 + handlePos, this->getY() + 40, this->getWidth() - 95 - handlePos, 5, a(tsl::style::color::ColorFrame));
+                renderer->drawRect(this->getX() + 60, this->getY() + 40, handlePos, 5, a(tsl::style::color::ColorHighlight));
+
+                renderer->drawCircle(this->getX() + 62 + handlePos, this->getY() + 42, 18, true, a(tsl::style::color::ColorHandle));
+                renderer->drawCircle(this->getX() + 62 + handlePos, this->getY() + 42, 18, false, a(tsl::style::color::ColorFrame));
+            }
+
+            virtual void layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) override {
+                this->setBoundaries(this->getX(), this->getY(), this->getWidth(), tsl::style::TrackBarDefaultHeight);
+            }
+
+            virtual void drawFocusBackground(gfx::Renderer *renderer) {
+                // No background drawn here in HOS
+            }
+
+            virtual void drawHighlight(gfx::Renderer *renderer) override {
+                static double counter = 0;
+                const double progress = (std::sin(counter) + 1) / 2;
+                Color highlightColor = {   static_cast<u8>((0x2 - 0x8) * progress + 0x8),
+                                                static_cast<u8>((0x8 - 0xF) * progress + 0xF),
+                                                static_cast<u8>((0xC - 0xF) * progress + 0xF),
+                                                static_cast<u8>((0x6 - 0xD) * progress + 0xD) };
+
+                counter += 0.1F;
+
+                u16 handlePos = (this->getWidth() - 95) * static_cast<double>(this->m_value) / 100;
+
+                s32 x = 0;
+                s32 y = 0;
+
+                if (Element::m_highlightShaking) {
+                    auto t = (std::chrono::system_clock::now() - Element::m_highlightShakingStartTime);
+                    if (t >= 100ms)
+                        Element::m_highlightShaking = false;
+                    else {
+                        s32 amplitude = std::rand() % 5 + 5;
+
+                        switch (Element::m_highlightShakingDirection) {
+                            case FocusDirection::Up:
+                                y -= shakeAnimation(t, amplitude);
+                                break;
+                            case FocusDirection::Down:
+                                y += shakeAnimation(t, amplitude);
+                                break;
+                            case FocusDirection::Left:
+                                x -= shakeAnimation(t, amplitude);
+                                break;
+                            case FocusDirection::Right:
+                                x += shakeAnimation(t, amplitude);
+                                break;
+                            default:
+                                break;
+                        }
+
+                        x = std::clamp(x, -amplitude, amplitude);
+                        y = std::clamp(y, -amplitude, amplitude);
+                    }
+                }
+
+                for (u8 i = 16; i <= 19; i++) {
+                    renderer->drawCircle(this->getX() + 62 + x + handlePos, this->getY() + 42 + y, i, false, a(highlightColor));
+                }
+            }
+
+            /**
+             * @brief Gets the current value of the trackbar
+             *
+             * @return State
+             */
+            virtual inline u8 getProgress() {
+                return this->m_value;
+            }
+
+            /**
+             * @brief Sets the current state of the toggle. Updates the Value
+             *
+             * @param state State
+             */
+            virtual void setProgress(u8 value) {
+                this->m_value = value;
+            }
+
+            /**
+             * @brief Adds a listener that gets called whenever the state of the toggle changes
+             *
+             * @param stateChangedListener Listener with the current state passed in as parameter
+             */
+            void setValueChangedListener(std::function<void(u8)> valueChangedListener) {
+                this->m_valueChangedListener = valueChangedListener;
+            }
+
+        protected:
+            const char *m_icon = nullptr;
+            s16 m_value = 0;
+            bool m_interactionLocked = false;
+
+            std::function<void(u8)> m_valueChangedListener = [](u8){};
+        };
+
+
+        /**
+         * @brief A customizable analog trackbar going from 0% to 100% but using discrete steps (Like the volume slider)
+         *
+         */
+        class StepTrackBar : public TrackBar {
+        public:
+            /**
+             * @brief Constructor
+             *
+             * @param icon Icon shown next to the track bar
+             * @param numSteps Number of steps the track bar has
+             */
+            StepTrackBar(const char icon[3], size_t numSteps)
+                : TrackBar(icon), m_numSteps(numSteps) { }
+
+            virtual ~StepTrackBar() {}
+
+            virtual bool handleInput(u64 keysDown, u64 keysHeld, const HidTouchState &touchPos, HidAnalogStickState leftJoyStick, HidAnalogStickState rightJoyStick) override {
+                static u32 tick = 0;
+
+                if (keysHeld & HidNpadButton_AnyLeft && keysHeld & HidNpadButton_AnyRight) {
+                    tick = 0;
+                    return true;
+                }
+
+                if (keysHeld & (HidNpadButton_AnyLeft | HidNpadButton_AnyRight)) {
+                    if ((tick == 0 || tick > 20) && (tick % 3) == 0) {
+                        if (keysHeld & HidNpadButton_AnyLeft && this->m_value > 0) {
+                            this->m_value = std::max(this->m_value - (100 / (this->m_numSteps - 1)), 0);
+                        } else if (keysHeld & HidNpadButton_AnyRight && this->m_value < 100) {
+                            this->m_value = std::min(this->m_value + (100 / (this->m_numSteps - 1)), 100);
+                        } else {
+                            return false;
+                        }
+                        this->m_valueChangedListener(this->getProgress());
+                    }
+                    tick++;
+                    return true;
+                } else {
+                    tick = 0;
+                }
+
+                return false;
+            }
+
+            virtual bool onTouch(TouchEvent event, s32 currX, s32 currY, s32 prevX, s32 prevY, s32 initialX, s32 initialY) override {
+                if (this->inBounds(initialX, initialY)) {
+                    if (currY > this->getTopBound() && currY < this->getBottomBound()) {
+                        s16 newValue = (static_cast<double>(currX - (this->getX() + 60)) / static_cast<double>(this->getWidth() - 95)) * 100;
+
+                        if (newValue < 0) {
+                            newValue = 0;
+                        } else if (newValue > 100) {
+                            newValue = 100;
+                        } else {
+                            newValue = std::round(newValue / (100.0F / (this->m_numSteps - 1))) * (100.0F / (this->m_numSteps - 1));
+                        }
+
+                        if (newValue != this->m_value) {
+                            this->m_value = newValue;
+                            this->m_valueChangedListener(this->getProgress());
+                        }
+
+                        return true;
+                    }
+                }
+
+                return false;
+            }
+
+            /**
+             * @brief Gets the current value of the trackbar
+             *
+             * @return State
+             */
+            virtual inline u8 getProgress() override {
+                return this->m_value / (100 / (this->m_numSteps - 1));
+            }
+
+            /**
+             * @brief Sets the current state of the toggle. Updates the Value
+             *
+             * @param state State
+             */
+            virtual void setProgress(u8 value) override {
+                value = std::min(value, u8(this->m_numSteps - 1));
+                this->m_value = value * (100 / (this->m_numSteps - 1));
+            }
+
+        protected:
+            u8 m_numSteps = 1;
+        };
+
+
+        /**
+         * @brief A customizable trackbar with multiple discrete steps with specific names. Name gets displayed above the bar
+         *
+         */
+        class NamedStepTrackBar : public StepTrackBar {
+        public:
+            /**
+             * @brief Constructor
+             *
+             * @param icon Icon shown next to the track bar
+             * @param stepDescriptions Step names displayed above the track bar
+             */
+            NamedStepTrackBar(const char icon[3], std::initializer_list<std::string> stepDescriptions)
+                : StepTrackBar(icon, stepDescriptions.size()), m_stepDescriptions(stepDescriptions.begin(), stepDescriptions.end()) { }
+
+            virtual ~NamedStepTrackBar() {}
+
+            virtual void draw(gfx::Renderer *renderer) override {
+
+                u16 trackBarWidth = this->getWidth() - 95;
+                u16 stepWidth = trackBarWidth / (this->m_numSteps - 1);
+
+                for (u8 i = 0; i < this->m_numSteps; i++) {
+                    renderer->drawRect(this->getX() + 60 + stepWidth * i, this->getY() + 50, 1, 10, a(tsl::style::color::ColorFrame));
+                }
+
+                u8 currentDescIndex = std::clamp(this->m_value / (100 / (this->m_numSteps - 1)), 0, this->m_numSteps - 1);
+
+                auto [descWidth, descHeight] = renderer->drawString(this->m_stepDescriptions[currentDescIndex].c_str(), false, 0, 0, 15, tsl::style::color::ColorTransparent);
+                renderer->drawString(this->m_stepDescriptions[currentDescIndex].c_str(), false, ((this->getX() + 60) + (this->getWidth() - 95) / 2) - (descWidth / 2), this->getY() + 20, 15, a(tsl::style::color::ColorDescription));
+
+                StepTrackBar::draw(renderer);
+            }
+
+        protected:
+            std::vector<std::string> m_stepDescriptions;
+        };
+
+    }
+
+    // GUI
+
+    /**
+     * @brief The top level Gui class
+     * @note The main menu and every sub menu are a separate Gui. Create your own Gui class that extends from this one to create your own menus
+     *
+     */
+    class Gui {
+    public:
+        Gui() { }
+
+        virtual ~Gui() {
+            if (this->m_topElement != nullptr)
+                delete this->m_topElement;
+        }
+
+        /**
+         * @brief Creates all elements present in this Gui
+         * @note Implement this function and let it return a heap allocated element used as the top level element. This is usually some kind of frame e.g \ref OverlayFrame
+         *
+         * @return Top level element
+         */
+        virtual elm::Element* createUI() = 0;
+
+        /**
+         * @brief Called once per frame to update values
+         *
+         */
+        virtual void update() {}
+
+        /**
+         * @brief Called once per frame with the latest HID inputs
+         *
+         * @param keysDown Buttons pressed in the last frame
+         * @param keysHeld Buttons held down longer than one frame
+         * @param touchInput Last touch position
+         * @param leftJoyStick Left joystick position
+         * @param rightJoyStick Right joystick position
+         * @return Weather or not the input has been consumed
+         */
+        virtual bool handleInput(u64 keysDown, u64 keysHeld, const HidTouchState &touchPos, HidAnalogStickState leftJoyStick, HidAnalogStickState rightJoyStick) {
+            return false;
+        }
+
+        /**
+         * @brief Gets the top level element
+         *
+         * @return Top level element
+         */
+        elm::Element* getTopElement() {
+            return this->m_topElement;
+        }
+
+        /**
+         * @brief Get the currently focused element
+         *
+         * @return Focused element
+         */
+        elm::Element* getFocusedElement() {
+            return this->m_focusedElement;
+        }
+
+        /**
+         * @brief Requests focus to a element
+         * @note Use this function when focusing a element outside of a element's requestFocus function
+         *
+         * @param element Element to focus
+         * @param direction Focus direction
+         */
+        void requestFocus(elm::Element *element, FocusDirection direction, bool shake = true) {
+            elm::Element *oldFocus = this->m_focusedElement;
+
+            if (element != nullptr) {
+                this->m_focusedElement = element->requestFocus(oldFocus, direction);
+
+                if (oldFocus != nullptr)
+                    oldFocus->setFocused(false);
+
+                if (this->m_focusedElement != nullptr) {
+                    this->m_focusedElement->setFocused(true);
+                }
+            }
+
+            if (shake && oldFocus == this->m_focusedElement && this->m_focusedElement != nullptr)
+                this->m_focusedElement->shakeHighlight(direction);
+        }
+
+        /**
+         * @brief Removes focus from a element
+         *
+         * @param element Element to remove focus from. Pass nullptr to remove the focus unconditionally
+         */
+        void removeFocus(elm::Element* element = nullptr) {
+            if (element == nullptr || element == this->m_focusedElement) {
+                if (this->m_focusedElement != nullptr) {
+                    this->m_focusedElement->setFocused(false);
+                    this->m_focusedElement = nullptr;
+                }
+            }
+        }
+
+        void restoreFocus() {
+            this->m_initialFocusSet = false;
+        }
+
+    protected:
+        constexpr static inline auto a = &gfx::Renderer::a;
+
+    private:
+        elm::Element *m_focusedElement = nullptr;
+        elm::Element *m_topElement = nullptr;
+
+        bool m_initialFocusSet = false;
+
+        friend class Overlay;
+        friend class gfx::Renderer;
+
+        /**
+         * @brief Draws the Gui
+         *
+         * @param renderer
+         */
+        void draw(gfx::Renderer *renderer) {
+            if (this->m_topElement != nullptr)
+                this->m_topElement->draw(renderer);
+        }
+
+        bool initialFocusSet() {
+            return this->m_initialFocusSet;
+        }
+
+        void markInitialFocusSet() {
+            this->m_initialFocusSet = true;
+        }
+
+    };
+
+
+    // Overlay
+
+    /**
+     * @brief The top level Overlay class
+     * @note Every Tesla overlay should have exactly one Overlay class initializing services and loading the default Gui
+     */
+    class Overlay {
+    protected:
+        /**
+         * @brief Constructor
+         * @note Called once when the Overlay gets loaded
+         */
+        Overlay() {}
+    public:
+        /**
+         * @brief Deconstructor
+         * @note Called once when the Overlay exits
+         *
+         */
+        virtual ~Overlay() {}
+
+        /**
+         * @brief Initializes services
+         * @note Called once at the start to initializes services. You have a sm session available during this call, no need to initialize sm yourself
+         */
+        virtual void initServices() {}
+
+        /**
+         * @brief Exits services
+         * @note Make sure to exit all services you initialized in \ref Overlay::initServices() here to prevent leaking handles
+         */
+        virtual void exitServices() {}
+
+        /**
+         * @brief Called before overlay changes from invisible to visible state
+         *
+         */
+        virtual void onShow() {}
+
+        /**
+         * @brief Called before overlay changes from visible to invisible state
+         *
+         */
+        virtual void onHide() {}
+
+        /**
+         * @brief Loads the default Gui
+         * @note This function should return the initial Gui to load using the \ref Gui::initially<T>(Args.. args) function
+         *       e.g `return initially<GuiMain>();`
+         *
+         * @return Default Gui
+         */
+        virtual std::unique_ptr<tsl::Gui> loadInitialGui() = 0;
+
+        /**
+         * @brief Gets a reference to the current Gui on top of the Gui stack
+         *
+         * @return Current Gui reference
+         */
+        std::unique_ptr<tsl::Gui>& getCurrentGui() {
+            return this->m_guiStack.top();
+        }
+
+        /**
+         * @brief Shows the Gui
+         *
+         */
+        void show() {
+            if (this->m_disableNextAnimation) {
+                this->m_animationCounter = 5;
+                this->m_disableNextAnimation = false;
+            }
+            else {
+                this->m_fadeInAnimationPlaying = true;
+                this->m_animationCounter = 0;
+            }
+
+            this->onShow();
+
+            if (auto& currGui = this->getCurrentGui(); currGui != nullptr)
+                currGui->restoreFocus();
+        }
+
+        /**
+         * @brief Hides the Gui
+         *
+         */
+        void hide() {
+            if (this->m_disableNextAnimation) {
+                this->m_animationCounter = 0;
+                this->m_disableNextAnimation = false;
+            }
+            else {
+                this->m_fadeOutAnimationPlaying = true;
+                this->m_animationCounter = 5;
+            }
+
+            this->onHide();
+        }
+
+        /**
+         * @brief Returns whether fade animation is playing
+         *
+         * @return whether fade animation is playing
+         */
+        bool fadeAnimationPlaying() {
+            return this->m_fadeInAnimationPlaying || this->m_fadeOutAnimationPlaying;
+        }
+
+        /**
+         * @brief Closes the Gui
+         * @note This makes the Tesla overlay exit and return back to the Tesla-Menu
+         *
+         */
+        void close() {
+            this->m_shouldClose = true;
+        }
+
+        /**
+         * @brief Gets the Overlay instance
+         *
+         * @return Overlay instance
+         */
+        static inline Overlay* const get() {
+            return Overlay::s_overlayInstance;
+        }
+
+        /**
+         * @brief Creates the initial Gui of an Overlay and moves the object to the Gui stack
+         *
+         * @tparam T
+         * @tparam Args
+         * @param args
+         * @return constexpr std::unique_ptr<T>
+         */
+        template<typename T, typename ... Args>
+        constexpr inline std::unique_ptr<T> initially(Args&&... args) {
+            return std::make_unique<T>(args...);
+        }
+
+    private:
+        using GuiPtr = std::unique_ptr<tsl::Gui>;
+        std::stack<GuiPtr, std::list<GuiPtr>> m_guiStack;
+        static inline Overlay *s_overlayInstance = nullptr;
+
+        bool m_fadeInAnimationPlaying = true, m_fadeOutAnimationPlaying = false;
+        u8 m_animationCounter = 0;
+
+        bool m_shouldHide = false;
+        bool m_shouldClose = false;
+
+        bool m_disableNextAnimation = false;
+
+        bool m_closeOnExit;
+
+        /**
+         * @brief Initializes the Renderer
+         *
+         */
+        void initScreen() {
+            gfx::Renderer::get().init();
+        }
+
+        /**
+         * @brief Exits the Renderer
+         *
+         */
+        void exitScreen() {
+            gfx::Renderer::get().exit();
+        }
+
+        /**
+         * @brief Weather or not the Gui should get hidden
+         *
+         * @return should hide
+         */
+        bool shouldHide() {
+            return this->m_shouldHide;
+        }
+
+        /**
+         * @brief Weather or not hte Gui should get closed
+         *
+         * @return should close
+         */
+        bool shouldClose() {
+            return this->m_shouldClose;
+        }
+
+        /**
+         * @brief Handles fade in and fade out animations of the Overlay
+         *
+         */
+        void animationLoop() {
+            if (this->m_fadeInAnimationPlaying) {
+                this->m_animationCounter++;
+
+                if (this->m_animationCounter >= 5)
+                    this->m_fadeInAnimationPlaying = false;
+            }
+
+            if (this->m_fadeOutAnimationPlaying) {
+                this->m_animationCounter--;
+
+                if (this->m_animationCounter == 0) {
+                    this->m_fadeOutAnimationPlaying = false;
+                    this->m_shouldHide = true;
+                }
+            }
+
+            gfx::Renderer::setOpacity(0.2 * this->m_animationCounter);
+        }
+
+        /**
+         * @brief Main loop
+         *
+         */
+        void loop() {
+            auto& renderer = gfx::Renderer::get();
+
+            renderer.startFrame();
+
+            this->animationLoop();
+            this->getCurrentGui()->update();
+            this->getCurrentGui()->draw(&renderer);
+
+            renderer.endFrame();
+        }
+
+        /**
+         * @brief Called once per frame with the latest HID inputs
+         *
+         * @param keysDown Buttons pressed in the last frame
+         * @param keysHeld Buttons held down longer than one frame
+         * @param touchInput Last touch position
+         * @param leftJoyStick Left joystick position
+         * @param rightJoyStick Right joystick position
+         * @return Weather or not the input has been consumed
+         */
+        void handleInput(u64 keysDown, u64 keysHeld, bool touchDetected, const HidTouchState &touchPos, HidAnalogStickState joyStickPosLeft, HidAnalogStickState joyStickPosRight) {
+            static HidTouchState initialTouchPos = { 0 };
+            static HidTouchState oldTouchPos = { 0 };
+            static bool oldTouchDetected = false;
+            static elm::TouchEvent touchEvent;
+            static u32 repeatTick = 0;
+
+            auto& currentGui = this->getCurrentGui();
+
+            if (currentGui == nullptr)
+                return;
+
+            auto currentFocus = currentGui->getFocusedElement();
+            auto topElement = currentGui->getTopElement();
+
+            if (currentFocus == nullptr) {
+                if (keysDown & HidNpadButton_B) {
+                    if (!currentGui->handleInput(HidNpadButton_B, 0,{},{},{}))
+                        this->goBack();
+                    return;
+                }
+
+                if (topElement == nullptr)
+                    return;
+                else if (currentGui != nullptr) {
+                    if (!currentGui->initialFocusSet() || keysDown & (HidNpadButton_AnyUp | HidNpadButton_AnyDown | HidNpadButton_AnyLeft | HidNpadButton_AnyRight)) {
+                        currentGui->requestFocus(topElement, FocusDirection::None);
+                        currentGui->markInitialFocusSet();
+                        repeatTick = 1;
+                    }
+                }
+            }
+
+            bool handled = false;
+            elm::Element *parentElement = currentFocus;
+
+            while (!handled && parentElement != nullptr) {
+                handled = parentElement->onClick(keysDown);
+                parentElement = parentElement->getParent();
+            }
+
+            parentElement = currentFocus;
+            while (!handled && parentElement != nullptr) {
+                handled = parentElement->handleInput(keysDown, keysHeld, touchPos, joyStickPosLeft, joyStickPosRight);
+                parentElement = parentElement->getParent();
+            }
+
+            if (currentGui != this->getCurrentGui())
+                return;
+
+            handled = handled | currentGui->handleInput(keysDown, keysHeld, touchPos, joyStickPosLeft, joyStickPosRight);
+
+            if (!handled && currentFocus != nullptr) {
+                static bool shouldShake = true;
+
+                if ((((keysHeld & HidNpadButton_AnyUp) != 0) + ((keysHeld & HidNpadButton_AnyDown) != 0) + ((keysHeld & HidNpadButton_AnyLeft) != 0) + ((keysHeld & HidNpadButton_AnyRight) != 0)) == 1) {
+                    if ((repeatTick == 0 || repeatTick > 20) && (repeatTick % 4) == 0) {
+                        if (keysHeld & HidNpadButton_AnyUp)
+                            currentGui->requestFocus(currentFocus->getParent(), FocusDirection::Up, shouldShake);
+                        else if (keysHeld & HidNpadButton_AnyDown)
+                            currentGui->requestFocus(currentFocus->getParent(), FocusDirection::Down, shouldShake);
+                        else if (keysHeld & HidNpadButton_AnyLeft)
+                            currentGui->requestFocus(currentFocus->getParent(), FocusDirection::Left, shouldShake);
+                        else if (keysHeld & HidNpadButton_AnyRight)
+                            currentGui->requestFocus(currentFocus->getParent(), FocusDirection::Right, shouldShake);
+
+                        shouldShake = currentGui->getFocusedElement() != currentFocus;
+                    }
+                    repeatTick++;
+                } else {
+                    if (keysDown & HidNpadButton_B)
+                        this->goBack();
+                    repeatTick = 0;
+                    shouldShake = true;
+                }
+            }
+
+            if (!touchDetected && oldTouchDetected) {
+                if (currentGui != nullptr && topElement != nullptr)
+                    topElement->onTouch(elm::TouchEvent::Release, oldTouchPos.x, oldTouchPos.y, oldTouchPos.x, oldTouchPos.y, initialTouchPos.x, initialTouchPos.y);
+            }
+
+            if (touchDetected) {
+
+                u32 xDistance = std::abs(static_cast<s32>(initialTouchPos.x) - static_cast<s32>(touchPos.x));
+                u32 yDistance = std::abs(static_cast<s32>(initialTouchPos.y) - static_cast<s32>(touchPos.y));
+
+                xDistance *= xDistance;
+                yDistance *= yDistance;
+
+                if ((xDistance + yDistance) > 1000) {
+                    elm::Element::setInputMode(InputMode::TouchScroll);
+                    touchEvent = elm::TouchEvent::Scroll;
+                } else {
+                    if (touchEvent != elm::TouchEvent::Scroll)
+                        touchEvent = elm::TouchEvent::Hold;
+                }
+
+                if (!oldTouchDetected) {
+                    initialTouchPos = touchPos;
+                    elm::Element::setInputMode(InputMode::Touch);
+                    currentGui->removeFocus();
+                    touchEvent = elm::TouchEvent::Touch;
+                }
+
+
+                if (currentGui != nullptr && topElement != nullptr)
+                    topElement->onTouch(touchEvent, touchPos.x, touchPos.y, oldTouchPos.x, oldTouchPos.y, initialTouchPos.x, initialTouchPos.y);
+
+                oldTouchPos = touchPos;
+
+                // Hide overlay when touching out of bounds
+                if (touchPos.x >= cfg::FramebufferWidth) {
+                    if (tsl::elm::Element::getInputMode() == tsl::InputMode::Touch) {
+                        oldTouchPos = { 0 };
+                        initialTouchPos = { 0 };
+
+                        this->hide();
+                    }
+                }
+            } else {
+                if (oldTouchPos.x < 150U && oldTouchPos.y > cfg::FramebufferHeight - 73U)
+                    if (initialTouchPos.x < 150U && initialTouchPos.y > cfg::FramebufferHeight - 73U)
+                        if (!currentGui->handleInput(HidNpadButton_B, 0,{},{},{}))
+                            this->goBack();
+
+                elm::Element::setInputMode(InputMode::Controller);
+
+                oldTouchPos = { 0 };
+                initialTouchPos = { 0 };
+            }
+
+            oldTouchDetected = touchDetected;
+        }
+
+        /**
+         * @brief Clears the screen
+         *
+         */
+        void clearScreen() {
+            auto& renderer = gfx::Renderer::get();
+
+            renderer.startFrame();
+            renderer.clearScreen();
+            renderer.endFrame();
+        }
+
+        /**
+         * @brief Reset hide and close flags that were previously set by \ref Overlay::close() or \ref Overlay::hide()
+         *
+         */
+        void resetFlags() {
+            this->m_shouldHide = false;
+            this->m_shouldClose = false;
+        }
+
+        /**
+         * @brief Disables the next animation that would play
+         *
+         */
+        void disableNextAnimation() {
+            this->m_disableNextAnimation = true;
+        }
+
+        /**
+         * @brief Changes to a different Gui
+         *
+         * @param gui Gui to change to
+         * @return Reference to the Gui
+         */
+        std::unique_ptr<tsl::Gui>& changeTo(std::unique_ptr<tsl::Gui>&& gui) {
+            if (this->m_guiStack.top() != nullptr && this->m_guiStack.top()->m_focusedElement != nullptr)
+                this->m_guiStack.top()->m_focusedElement->resetClickAnimation();
+
+            gui->m_topElement = gui->createUI();
+
+            this->m_guiStack.push(std::move(gui));
+
+            return this->m_guiStack.top();
+        }
+
+        /**
+         * @brief Creates a new Gui and changes to it
+         *
+         * @tparam G Gui to create
+         * @tparam Args Arguments to pass to the Gui
+         * @param args Arguments to pass to the Gui
+         * @return Reference to the newly created Gui
+         */
+        template<typename G, typename ...Args>
+        std::unique_ptr<tsl::Gui>& changeTo(Args&&... args) {
+            return this->changeTo(std::make_unique<G>(std::forward<Args>(args)...));
+        }
+
+        /**
+         * @brief Pops the top Gui from the stack and goes back to the last one
+         * @note The Overlay gets closes once there are no more Guis on the stack
+         */
+        void goBack() {
+            if (!this->m_closeOnExit && this->m_guiStack.size() == 1) {
+                this->hide();
+                return;
+            }
+
+            if (!this->m_guiStack.empty())
+                this->m_guiStack.pop();
+
+            if (this->m_guiStack.empty())
+                this->close();
+        }
+
+        template<typename G, typename ...Args>
+        friend std::unique_ptr<tsl::Gui>& changeTo(Args&&... args);
+
+        friend void goBack();
+
+        template<typename, tsl::impl::LaunchFlags>
+        friend int loop(int argc, char** argv);
+
+        friend class tsl::Gui;
+    };
+
+
+    namespace impl {
+
+        /**
+         * @brief Data shared between the different threads
+         *
+         */
+        struct SharedThreadData {
+            bool running = false;
+
+            Event comboEvent = { 0 };
+
+            bool overlayOpen = false;
+
+            std::mutex dataMutex;
+            u64 keysDown = 0;
+            u64 keysDownPending = 0;
+            u64 keysHeld = 0;
+            HidTouchScreenState touchState = { 0 };
+            HidAnalogStickState joyStickPosLeft = { 0 }, joyStickPosRight = { 0 };
+        };
+
+
+        /**
+         * @brief Extract values from Tesla settings file
+         *
+         */
+        static void parseOverlaySettings() {
+            hlp::ini::IniData parsedConfig = hlp::ini::readOverlaySettings();
+
+            u64 decodedKeys = hlp::comboStringToKeys(parsedConfig["tesla"]["key_combo"]);
+            if (decodedKeys)
+                tsl::cfg::launchCombo = decodedKeys;
+        }
+
+        /**
+         * @brief Update and save launch combo keys
+         *
+         * @param keys the new combo keys
+         */
+        [[maybe_unused]] static void updateCombo(u64 keys) {
+            tsl::cfg::launchCombo = keys;
+            hlp::ini::updateOverlaySettings({
+                { "tesla", {
+                    { "key_combo", tsl::hlp::keysToComboString(keys) }
+                }}
+            });
+        }
+
+        /**
+         * @brief Background event polling loop thread
+         *
+         * @param args Used to pass in a pointer to a \ref SharedThreadData struct
+         */
+        static void backgroundEventPoller(void *args) {
+            SharedThreadData *shData = static_cast<SharedThreadData*>(args);
+
+            // To prevent focus glitchout, close the overlay immediately when the home button gets pressed
+            Event homeButtonPressEvent = {};
+            hidsysAcquireHomeButtonEventHandle(&homeButtonPressEvent, false);
+            eventClear(&homeButtonPressEvent);
+            hlp::ScopeGuard homeButtonEventGuard([&] { eventClose(&homeButtonPressEvent); });
+
+            // To prevent focus glitchout, close the overlay immediately when the power button gets pressed
+            Event powerButtonPressEvent = {};
+            hidsysAcquireSleepButtonEventHandle(&powerButtonPressEvent, false);
+            eventClear(&powerButtonPressEvent);
+            hlp::ScopeGuard powerButtonEventGuard([&] { eventClose(&powerButtonPressEvent); });
+
+            // Parse Tesla settings
+            impl::parseOverlaySettings();
+
+            // Configure input to take all controllers and up to 8
+            padConfigureInput(8, HidNpadStyleSet_NpadStandard | HidNpadStyleTag_NpadSystemExt);
+
+            // Initialize pad
+            PadState pad;
+            padInitializeAny(&pad);
+
+            // Initialize touch screen
+            hidInitializeTouchScreen();
+
+            // Drop all inputs from the previous overlay
+            padUpdate(&pad);
+
+            enum WaiterObject {
+                WaiterObject_HomeButton,
+                WaiterObject_PowerButton,
+
+                WaiterObject_Count
+            };
+
+            // Construct waiter
+            Waiter objects[2] = {
+                [WaiterObject_HomeButton] = waiterForEvent(&homeButtonPressEvent),
+                [WaiterObject_PowerButton] = waiterForEvent(&powerButtonPressEvent),
+            };
+
+            while (shData->running) {
+                // Scan for input changes
+                padUpdate(&pad);
+
+                // Read in HID values
+                {
+                    std::scoped_lock lock(shData->dataMutex);
+
+                    shData->keysDown = padGetButtonsDown(&pad);
+                    shData->keysHeld = padGetButtons(&pad);
+                    shData->joyStickPosLeft  = padGetStickPos(&pad, 0);
+                    shData->joyStickPosRight = padGetStickPos(&pad, 1);
+
+                    // Read in touch positions
+                    if (hidGetTouchScreenStates(&shData->touchState, 1) == 0)
+                        shData->touchState = { 0 };
+
+                    if (((shData->keysHeld & tsl::cfg::launchCombo) == tsl::cfg::launchCombo) && shData->keysDown & tsl::cfg::launchCombo) {
+                        if (shData->overlayOpen) {
+                            tsl::Overlay::get()->hide();
+                            shData->overlayOpen = false;
+                        }
+                        else
+                            eventFire(&shData->comboEvent);
+                    }
+
+                    shData->keysDownPending |= shData->keysDown;
+                }
+
+                //20 ms
+                s32 idx = 0;
+                Result rc = waitObjects(&idx, objects, WaiterObject_Count, 20'000'000ul);
+                if (R_SUCCEEDED(rc)) {
+                    if (shData->overlayOpen) {
+                        tsl::Overlay::get()->hide();
+                        shData->overlayOpen = false;
+                    }
+
+                    switch (idx) {
+                        case WaiterObject_HomeButton:
+                            eventClear(&homeButtonPressEvent);
+                            break;
+                        case WaiterObject_PowerButton:
+                            eventClear(&powerButtonPressEvent);
+                            break;
+                    }
+                } else if (rc != KERNELRESULT(TimedOut)) {
+                    ASSERT_FATAL(rc);
+                }
+            }
+        }
+
+    }
+
+    /**
+     * @brief Creates a new Gui and changes to it
+     *
+     * @tparam G Gui to create
+     * @tparam Args Arguments to pass to the Gui
+     * @param args Arguments to pass to the Gui
+     * @return Reference to the newly created Gui
+     */
+    template<typename G, typename ...Args>
+    std::unique_ptr<tsl::Gui>& changeTo(Args&&... args) {
+        return Overlay::get()->changeTo<G, Args...>(std::forward<Args>(args)...);
+    }
+
+    /**
+     * @brief Pops the top Gui from the stack and goes back to the last one
+     * @note The Overlay gets closed once there are no more Guis on the stack
+     */
+    static void goBack() {
+        Overlay::get()->goBack();
+    }
+
+    static void setNextOverlay(const std::string& ovlPath, std::string origArgs) {
+
+        std::string args = std::filesystem::path(ovlPath).filename();
+        args += " " + origArgs + " --skipCombo";
+
+        envSetNextLoad(ovlPath.c_str(), args.c_str());
+    }
+
+
+
+    /**
+     * @brief libtesla's main function
+     * @note Call it directly from main passing in argc and argv and returning it e.g `return tsl::loop<OverlayTest>(argc, argv);`
+     *
+     * @tparam TOverlay Your overlay class
+     * @tparam launchFlags \ref LaunchFlags
+     * @param argc argc
+     * @param argv argv
+     * @return int result
+     */
+    template<typename TOverlay, impl::LaunchFlags launchFlags>
+    static inline int loop(int argc, char** argv) {
+        static_assert(std::is_base_of_v<tsl::Overlay, TOverlay>, "tsl::loop expects a type derived from tsl::Overlay");
+
+        impl::SharedThreadData shData;
+
+        shData.running = true;
+
+        Thread backgroundThread;
+        threadCreate(&backgroundThread, impl::backgroundEventPoller, &shData, nullptr, 0x1000, 0x2c, -2);
+        threadStart(&backgroundThread);
+
+        eventCreate(&shData.comboEvent, false);
+
+        auto& overlay = tsl::Overlay::s_overlayInstance;
+        overlay = new TOverlay();
+        overlay->m_closeOnExit = (u8(launchFlags) & u8(impl::LaunchFlags::CloseOnExit)) == u8(impl::LaunchFlags::CloseOnExit);
+
+        tsl::hlp::doWithSmSession([&overlay]{ overlay->initServices(); });
+        overlay->initScreen();
+        overlay->changeTo(overlay->loadInitialGui());
+
+
+        // Argument parsing
+        for (u8 arg = 0; arg < argc; arg++) {
+            if (strcasecmp(argv[arg], "--skipCombo") == 0) {
+                eventFire(&shData.comboEvent);
+                overlay->disableNextAnimation();
+            }
+        }
+
+
+        while (shData.running) {
+
+            eventWait(&shData.comboEvent, UINT64_MAX);
+            eventClear(&shData.comboEvent);
+            shData.overlayOpen = true;
+
+
+            hlp::requestForeground(true);
+
+            overlay->show();
+            overlay->clearScreen();
+
+
+            while (shData.running) {
+                overlay->loop();
+
+                {
+                    std::scoped_lock lock(shData.dataMutex);
+                    if (!overlay->fadeAnimationPlaying()) {
+                        overlay->handleInput(shData.keysDownPending, shData.keysHeld, shData.touchState.count, shData.touchState.touches[0], shData.joyStickPosLeft, shData.joyStickPosRight);
+                    }
+                    shData.keysDownPending = 0;
+                }
+
+                if (overlay->shouldHide())
+                    break;
+
+                if (overlay->shouldClose())
+                    shData.running = false;
+            }
+
+            overlay->clearScreen();
+            overlay->resetFlags();
+
+            hlp::requestForeground(false);
+
+            shData.overlayOpen = false;
+            eventClear(&shData.comboEvent);
+        }
+
+        eventClose(&shData.comboEvent);
+
+        threadWaitForExit(&backgroundThread);
+        threadClose(&backgroundThread);
+
+        overlay->exitScreen();
+        overlay->exitServices();
+
+        delete overlay;
+
+        return 0;
+    }
+
+}
+
+
+#ifdef TESLA_INIT_IMPL
+
+namespace tsl::cfg {
+
+    u16 LayerWidth  = 0;
+    u16 LayerHeight = 0;
+    u16 LayerPosX   = 0;
+    u16 LayerPosY   = 0;
+    u16 FramebufferWidth  = 0;
+    u16 FramebufferHeight = 0;
+    u64 launchCombo = HidNpadButton_L | HidNpadButton_Down | HidNpadButton_StickR;
+}
+
+extern "C" {
+
+    u32 __nx_applet_type = AppletType_None;
+    u32 __nx_fs_num_sessions = 1;
+    u32  __nx_nv_transfermem_size = 0x40000;
+    ViLayerFlags __nx_vi_stray_layer_flags = (ViLayerFlags)0;
+
+    /**
+     * @brief libtesla service initializing function to override libnx's
+     *
+     */
+    void __appInit(void) {
+        tsl::hlp::doWithSmSession([]{
+            ASSERT_FATAL(fsInitialize());
+            ASSERT_FATAL(hidInitialize());                          // Controller inputs and Touch
+            if (hosversionAtLeast(16,0,0)) {
+                ASSERT_FATAL(plInitialize(PlServiceType_User));     // Font data. Use pl:u for 16.0.0+
+            } else {
+                ASSERT_FATAL(plInitialize(PlServiceType_System));   // Use pl:s for 15.0.1 and below to prevent qlaunch/overlaydisp session exhaustion
+            }
+            ASSERT_FATAL(pmdmntInitialize());                       // PID querying
+            ASSERT_FATAL(hidsysInitialize());                       // Focus control
+            ASSERT_FATAL(setsysInitialize());                       // Settings querying
+        });
+    }
+
+    /**
+     * @brief libtesla service exiting function to override libnx's
+     *
+     */
+    void __appExit(void) {
+        fsExit();
+        hidExit();
+        plExit();
+        pmdmntExit();
+        hidsysExit();
+        setsysExit();
+    }
+
+}
+
+#endif
diff --git a/overlay/source/elm_overlayframe.hpp b/overlay/source/elm_overlayframe.hpp
index 58a40e2..bf8f4a9 100644
--- a/overlay/source/elm_overlayframe.hpp
+++ b/overlay/source/elm_overlayframe.hpp
@@ -1,121 +1,121 @@
-#pragma once
-
-#include </mnt/c/Users/carel/Downloads/sys-tune-master-128bitsound/sys-tune-master/libtesla/include/tesla.hpp>
-#include <memory>
-#include <optional>
-
-/**
- * @brief The base frame which can contain another view
- *
- */
-class SysTuneOverlayFrame final : public tsl::elm::Element {
-public:
-    /**
-     * @brief Constructor
-     *
-     * @param title Name of the Overlay drawn bolt at the top
-     * @param subtitle Subtitle drawn bellow the title e.g version number
-     */
-    SysTuneOverlayFrame() : tsl::elm::Element() {}
-
-    void draw(tsl::gfx::Renderer *renderer) override {
-        renderer->fillScreen(a(tsl::style::color::ColorFrameBackground));
-        renderer->drawRect(tsl::cfg::FramebufferWidth - 1, 0, 1, tsl::cfg::FramebufferHeight, a(0xF222));
-
-        renderer->drawString("ovl-tune \u266B", false, 20, 50, 30, a(tsl::style::color::ColorText));
-        renderer->drawString(VERSION, false, 20, 70, 15, a(tsl::style::color::ColorDescription));
-
-        renderer->drawRect(15, tsl::cfg::FramebufferHeight - 73, tsl::cfg::FramebufferWidth - 30, 1, a(tsl::style::color::ColorText));
-
-        renderer->drawString(m_description, false, 30, 693, 23, a(tsl::style::color::ColorText));
-
-        if (m_contentElement != nullptr)
-            m_contentElement->frame(renderer);
-
-        if (m_toast) {
-            s32 width = tsl::cfg::FramebufferWidth - 20;
-            s32 height = 110;
-            s32 startX = 10;
-            s32 startY = tsl::cfg::FramebufferHeight - height;
-
-            u32 fadeDuration = 10;
-            if (m_toast->Current < fadeDuration) {
-                s32 offset = height - (height / fadeDuration) * m_toast->Current;
-                startY += offset;
-            }
-            if (m_toast->Duration - m_toast->Current < fadeDuration) {
-                s32 offset = height - (height / fadeDuration) * (m_toast->Duration - m_toast->Current);
-                startY += offset;
-            }
-
-            renderer->drawRect(startX, startY, width, height, a(tsl::style::color::ColorText));
-            renderer->drawRect(startX + 3, startY + 3, width - 6, height - 6, a(tsl::style::color::ColorFrameBackground));
-            renderer->drawString(m_toast->Header.c_str(), false, startX + 10, startY + 40, 26, a(tsl::style::color::ColorText));
-            renderer->drawString(m_toast->Content.c_str(), false, startX + 10, startY + 80, 23, a(tsl::style::color::ColorText), width - 20);
-
-            ++m_toast->Current;
-            if (m_toast->Duration <= m_toast->Current) m_toast = std::nullopt;
-        }
-    }
-
-    void layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) override {
-        setBoundaries(parentX, parentY, parentWidth, parentHeight);
-
-        if (m_contentElement != nullptr) {
-            m_contentElement->setBoundaries(parentX + 35, parentY + 125, parentWidth - 85, parentHeight - 73 - 125);
-            m_contentElement->invalidate();
-        }
-    }
-
-    Element* requestFocus(tsl::elm::Element *oldFocus, tsl::FocusDirection direction) override {
-        if (m_contentElement != nullptr)
-            return m_contentElement->requestFocus(oldFocus, direction);
-        else
-            return nullptr;
-    }
-
-    bool onTouch(tsl::elm::TouchEvent event, s32 currX, s32 currY, s32 prevX, s32 prevY, s32 initialX, s32 initialY) override {
-        // Discard touches outside bounds
-        if (!m_contentElement->inBounds(currX, currY))
-            return false;
-
-        if (m_contentElement != nullptr)
-            return m_contentElement->onTouch(event, currX, currY, prevX, prevY, initialX, initialY);
-        else return false;
-    }
-
-    /**
-     * @brief Sets the content of the frame
-     *
-     * @param content Element
-     */
-    void setContent(tsl::elm::Element *content) {
-        m_contentElement = std::unique_ptr<tsl::elm::Element>(content);
-
-        if (content != nullptr) {
-            m_contentElement->setParent(this);
-            this->invalidate();
-        }
-    }
-
-    void setDescription(const char *description) {
-        m_description = description;
-    }
-
-    struct Toast {
-        std::string Header;
-        std::string Content;
-        u32 Duration;
-        u32 Current;
-    };
-
-    void setToast(std::string header, std::string content) {
-        m_toast = Toast { header, content, 150, 0 };
-    }
-
-private:
-    std::unique_ptr<tsl::elm::Element> m_contentElement;
-    const char *m_description = "\uE0E1  Back     \uE0E0  OK";
-
-    std::optional<Toast> m_toast;
-};
+#pragma once
+
+#include </mnt/c/Users/carel/Downloads/sys-tune-master-128bitsound/sys-tune-master/libtesla/include/tesla.hpp>
+#include <memory>
+#include <optional>
+
+/**
+ * @brief The base frame which can contain another view
+ *
+ */
+class SysTuneOverlayFrame final : public tsl::elm::Element {
+public:
+    /**
+     * @brief Constructor
+     *
+     * @param title Name of the Overlay drawn bolt at the top
+     * @param subtitle Subtitle drawn bellow the title e.g version number
+     */
+    SysTuneOverlayFrame() : tsl::elm::Element() {}
+
+    void draw(tsl::gfx::Renderer *renderer) override {
+        renderer->fillScreen(a(tsl::style::color::ColorFrameBackground));
+        renderer->drawRect(tsl::cfg::FramebufferWidth - 1, 0, 1, tsl::cfg::FramebufferHeight, a(0xF222));
+
+        renderer->drawString("ovl-tune \u266B", false, 20, 50, 30, a(tsl::style::color::ColorText));
+        renderer->drawString(VERSION, false, 20, 70, 15, a(tsl::style::color::ColorDescription));
+
+        renderer->drawRect(15, tsl::cfg::FramebufferHeight - 73, tsl::cfg::FramebufferWidth - 30, 1, a(tsl::style::color::ColorText));
+
+        renderer->drawString(m_description, false, 30, 693, 23, a(tsl::style::color::ColorText));
+
+        if (m_contentElement != nullptr)
+            m_contentElement->frame(renderer);
+
+        if (m_toast) {
+            s32 width = tsl::cfg::FramebufferWidth - 20;
+            s32 height = 110;
+            s32 startX = 10;
+            s32 startY = tsl::cfg::FramebufferHeight - height;
+
+            u32 fadeDuration = 10;
+            if (m_toast->Current < fadeDuration) {
+                s32 offset = height - (height / fadeDuration) * m_toast->Current;
+                startY += offset;
+            }
+            if (m_toast->Duration - m_toast->Current < fadeDuration) {
+                s32 offset = height - (height / fadeDuration) * (m_toast->Duration - m_toast->Current);
+                startY += offset;
+            }
+
+            renderer->drawRect(startX, startY, width, height, a(tsl::style::color::ColorText));
+            renderer->drawRect(startX + 3, startY + 3, width - 6, height - 6, a(tsl::style::color::ColorFrameBackground));
+            renderer->drawString(m_toast->Header.c_str(), false, startX + 10, startY + 40, 26, a(tsl::style::color::ColorText));
+            renderer->drawString(m_toast->Content.c_str(), false, startX + 10, startY + 80, 23, a(tsl::style::color::ColorText), width - 20);
+
+            ++m_toast->Current;
+            if (m_toast->Duration <= m_toast->Current) m_toast = std::nullopt;
+        }
+    }
+
+    void layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) override {
+        setBoundaries(parentX, parentY, parentWidth, parentHeight);
+
+        if (m_contentElement != nullptr) {
+            m_contentElement->setBoundaries(parentX + 35, parentY + 125, parentWidth - 85, parentHeight - 73 - 125);
+            m_contentElement->invalidate();
+        }
+    }
+
+    Element* requestFocus(tsl::elm::Element *oldFocus, tsl::FocusDirection direction) override {
+        if (m_contentElement != nullptr)
+            return m_contentElement->requestFocus(oldFocus, direction);
+        else
+            return nullptr;
+    }
+
+    bool onTouch(tsl::elm::TouchEvent event, s32 currX, s32 currY, s32 prevX, s32 prevY, s32 initialX, s32 initialY) override {
+        // Discard touches outside bounds
+        if (!m_contentElement->inBounds(currX, currY))
+            return false;
+
+        if (m_contentElement != nullptr)
+            return m_contentElement->onTouch(event, currX, currY, prevX, prevY, initialX, initialY);
+        else return false;
+    }
+
+    /**
+     * @brief Sets the content of the frame
+     *
+     * @param content Element
+     */
+    void setContent(tsl::elm::Element *content) {
+        m_contentElement = std::unique_ptr<tsl::elm::Element>(content);
+
+        if (content != nullptr) {
+            m_contentElement->setParent(this);
+            this->invalidate();
+        }
+    }
+
+    void setDescription(const char *description) {
+        m_description = description;
+    }
+
+    struct Toast {
+        std::string Header;
+        std::string Content;
+        u32 Duration;
+        u32 Current;
+    };
+
+    void setToast(std::string header, std::string content) {
+        m_toast = Toast { header, content, 150, 0 };
+    }
+
+private:
+    std::unique_ptr<tsl::elm::Element> m_contentElement;
+    const char *m_description = "\uE0E1  Back     \uE0E0  OK";
+
+    std::optional<Toast> m_toast;
+};
diff --git a/overlay/source/elm_status_bar.cpp b/overlay/source/elm_status_bar.cpp
index 412cb52..7bf7910 100644
--- a/overlay/source/elm_status_bar.cpp
+++ b/overlay/source/elm_status_bar.cpp
@@ -1,284 +1,284 @@
-#include "elm_status_bar.hpp"
-
-#include "symbol.hpp"
-#include "config/config.hpp"
-
-namespace {
-
-    char path_buffer[FS_MAX_PATH] = "";
-    char current_buffer[0x20] = "";
-    char total_buffer[0x20] = "";
-
-    void NullLastDot(char *str) {
-        char *end = str + strlen(str) - 1;
-        while (str != end) {
-            if (*end == '.') {
-                *end = '\0';
-                return;
-            }
-            end--;
-        }
-    }
-
-}
-
-StatusBar::StatusBar() {
-    if (R_FAILED(tuneGetRepeatMode(&this->m_repeat)))
-        this->m_repeat = TuneRepeatMode_Off;
-    if (R_FAILED(tuneGetShuffleMode(&this->m_shuffle)))
-        this->m_shuffle = TuneShuffleMode_Off;
-    if (R_FAILED(tuneGetCurrentQueueItem(path_buffer, FS_MAX_PATH, &this->m_stats))) {
-        path_buffer[0] = '\0';
-        this->m_stats = {};
-    }
-}
-
-tsl::elm::Element *StatusBar::requestFocus(tsl::elm::Element *oldFocus, tsl::FocusDirection direction) {
-    return this;
-}
-
-void StatusBar::draw(tsl::gfx::Renderer *renderer) {
-    if (this->m_touched && Element::getInputMode() == tsl::InputMode::Touch) {
-        renderer->drawRect(ELEMENT_BOUNDS(this), a(tsl::style::color::ColorClickAnimation));
-    }
-
-    if (this->m_text_width == 0) {
-        /* Get base width. */
-        auto [width, height] = renderer->drawString(this->m_current_track.data(), false, 0, 0, 26, tsl::style::color::ColorTransparent);
-        this->m_truncated = static_cast<s32>(width) > (this->getWidth() - 30);
-        if (this->m_truncated) {
-            /* Get width with spacing. */
-            this->m_scroll_text = std::string(m_current_track).append("       ");
-            auto [ex_width, ex_height] = renderer->drawString(this->m_scroll_text.c_str(), false, 0, 0, 26, tsl::style::color::ColorTransparent);
-            this->m_scroll_text.append(m_current_track);
-            this->m_text_width = ex_width;
-        } else {
-            this->m_text_width = width;
-        }
-    }
-
-    /* Current track. */
-    if (this->m_truncated) {
-        renderer->drawString(this->m_scroll_text.c_str(), false, this->getX() + 15 - this->m_scroll_offset, this->getY() + 40, 26, tsl::style::color::ColorText);
-        if (this->m_counter == 120) {
-            if (this->m_scroll_offset == this->m_text_width) {
-                this->m_scroll_offset = 0;
-                this->m_counter = 0;
-            } else {
-                this->m_scroll_offset++;
-            }
-        } else {
-            this->m_counter++;
-        }
-    } else {
-        renderer->drawString(this->m_current_track.data(), false, this->getX() + 15, this->getY() + 40, 26, tsl::style::color::ColorText);
-    }
-
-    /* Seek bar. */
-    u32 bar_length = this->getWidth() - 30;
-    renderer->drawRect(this->getX() + 15, this->getY() + tsl::style::ListItemDefaultHeight, bar_length, 3, 0xffff);
-
-    if (this->m_percentage > 0) {
-        renderer->drawCircle(this->getX() + 15, this->getY() + tsl::style::ListItemDefaultHeight + 1, 3, true, 0xf00f);
-        renderer->drawRect(this->getX() + 15, this->getY() + tsl::style::ListItemDefaultHeight - 2, bar_length * this->m_percentage, 7, 0xf00f);
-        renderer->drawCircle(this->getX() + 15 + bar_length * this->m_percentage, this->getY() + tsl::style::ListItemDefaultHeight + 1, 3, true, 0xf00f);
-    }
-
-    /* Progress */
-    renderer->drawString(current_buffer, false, this->getX() + 15, this->getY() + CenterOfLine(1) + 8, 20, 0xffff);
-
-    /* Repeat indicator */
-    auto repeat_color = this->m_repeat ? tsl::style::color::ColorHighlight : tsl::style::color::ColorHeaderBar;
-    if (this->m_repeat == TuneRepeatMode_One) {
-        symbol::repeat::one::symbol.draw(GetRepeatX(), GetRepeatY(), renderer, repeat_color);
-    } else {
-        symbol::repeat::all::symbol.draw(GetRepeatX(), GetRepeatY(), renderer, repeat_color);
-    }
-
-    /* Shuffle indicator */
-    auto shuffle_color = this->m_shuffle ? tsl::style::color::ColorHighlight : tsl::style::color::ColorHeaderBar;
-    symbol::shuffle::symbol.draw(GetShuffleX(), GetShuffleY(), renderer, shuffle_color);
-
-    /* Song length */
-    renderer->drawString(total_buffer, false, this->getX() + this->getWidth() - 75, this->getY() + CenterOfLine(1) + 8, 20, 0xffff);
-
-    /* Backward button */
-    symbol::backward::symbol.draw(GetBackwardX(), GetBackwardY(), renderer, tsl::style::color::ColorText);
-
-    /* Prev button */
-    symbol::prev::symbol.draw(GetPrevX(), GetPrevY(), renderer, tsl::style::color::ColorText);
-
-    /* Current playback glyph */
-    this->GetPlaybackSymbol().draw(GetPlayStateX(), GetPlayStateY(), renderer, tsl::style::color::ColorText);
-
-    /* Next button */
-    symbol::next::symbol.draw(GetNextX(), GetNextY(), renderer, tsl::style::color::ColorText);
-
-    /* Forward button */
-    symbol::forward::symbol.draw(GetForwardX(), GetForwardY(), renderer, tsl::style::color::ColorText);
-}
-
-void StatusBar::layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) {
-    this->setBoundaries(this->getX(), this->getY(), this->getWidth(), tsl::style::ListItemDefaultHeight * 3);
-}
-
-bool StatusBar::onClick(u64 keys) {
-    u8 handled = 0;
-    if (keys & HidNpadButton_A) {
-        this->CyclePlay();
-        handled++;
-    }
-    if (keys & HidNpadButton_X) {
-        this->CycleRepeat();
-        handled++;
-    }
-    if (keys & HidNpadButton_Y) {
-        this->CycleShuffle();
-        handled++;
-    }
-    if (keys & HidNpadButton_Right) {
-        this->Next();
-        handled++;
-    }
-    if (keys & HidNpadButton_Left) {
-        this->Prev();
-        handled++;
-    }
-    if (keys & HidNpadButton_ZL) {
-        this->Backward();
-        handled++;
-    }
-    if (keys & HidNpadButton_ZR) {
-        this->Forward();
-        handled++;
-    }
-    return handled;
-}
-
-#define TOUCHED(button) (currX > (Get##button##X() - 30) && currX < (Get##button##X() + 30) && prevY > (Get##button##Y() - 30) && prevY < (Get##button##Y() + 30))
-
-bool StatusBar::onTouch(tsl::elm::TouchEvent event, s32 currX, s32 currY, s32 prevX, s32 prevY, s32 initialX, s32 initialY) {
-    if (event == tsl::elm::TouchEvent::Touch)
-        this->m_touched = this->inBounds(currX, currY);
-
-    if (event == tsl::elm::TouchEvent::Release && this->m_touched) {
-        this->m_touched = false;
-
-        if (Element::getInputMode() == tsl::InputMode::Touch) {
-            u16 handled = 0;
-            if (TOUCHED(Repeat)) {
-                this->CycleRepeat();
-                handled++;
-            }
-            if (TOUCHED(Shuffle)) {
-                this->CycleShuffle();
-                handled++;
-            }
-            if (TOUCHED(PlayState)) {
-                this->CyclePlay();
-                handled++;
-            }
-            if (TOUCHED(Prev)) {
-                this->Prev();
-                handled++;
-            }
-            if (TOUCHED(Next)) {
-                this->Next();
-                handled++;
-            }
-            if (TOUCHED(Forward)) {
-                this->Forward();
-                handled++;
-            }
-            if (TOUCHED(Backward)) {
-                this->Backward();
-                handled++;
-            }
-
-            if (handled > 0) {
-                this->m_clickAnimationProgress = 0;
-                return true;
-            }
-        }
-    }
-
-    return false;
-}
-
-void StatusBar::update() {
-    if (R_FAILED(tuneGetStatus(&this->m_playing)))
-        this->m_playing = false;
-
-    if (R_SUCCEEDED(tuneGetCurrentQueueItem(path_buffer, FS_MAX_PATH, &this->m_stats))) {
-        /* Only show file name. Ignore path to file and extension. */
-        size_t length = std::strlen(path_buffer);
-        NullLastDot(path_buffer);
-        for (size_t i = length; i >= 0; i--) {
-            if (path_buffer[i] == '/') {
-                if (this->m_current_track != path_buffer + i + 1) {
-                    this->m_current_track = path_buffer + i + 1;
-                    this->m_text_width = 0;
-                    this->m_scroll_offset = 0;
-                    this->m_counter = 0;
-                }
-                break;
-            }
-        }
-    } else {
-        this->m_current_track = "Stopped!";
-        this->m_stats = {};
-        /* Reset scrolling text */
-        this->m_text_width = 0;
-        this->m_scroll_offset = 0;
-        this->m_counter = 0;
-    }
-    /* Progress text and bar */
-    u32 current = this->m_stats.current_frame / this->m_stats.sample_rate;
-    u32 total = this->m_stats.total_frames / this->m_stats.sample_rate;
-    this->m_percentage = std::clamp(double(this->m_stats.current_frame) / double(this->m_stats.total_frames), (double)0.0, (double)0.0);
-
-    std::snprintf(current_buffer, sizeof(current_buffer), "%d:%02d", current / 60, current % 60);
-    std::snprintf(total_buffer, sizeof(total_buffer), "%d:%02d", total / 60, total % 60);
-}
-
-void StatusBar::CycleRepeat() {
-    this->m_repeat = static_cast<TuneRepeatMode>((this->m_repeat + 1) % TuneRepeatMode_Count);
-    config::set_repeat(this->m_repeat);
-    tuneSetRepeatMode(this->m_repeat);
-}
-
-void StatusBar::CycleShuffle() {
-    this->m_shuffle = static_cast<TuneShuffleMode>((this->m_shuffle + 1) % TuneShuffleMode_Count);
-    config::set_shuffle(this->m_shuffle);
-    tuneSetShuffleMode(this->m_shuffle);
-}
-
-void StatusBar::CyclePlay() {
-    if (this->m_playing) {
-        tunePause();
-    } else {
-        tunePlay();
-    }
-}
-
-void StatusBar::Prev() {
-    tunePrev();
-}
-
-void StatusBar::Next() {
-    tuneNext();
-}
-
-void StatusBar::Forward() {
-    u32 next = std::min(this->m_stats.current_frame + (this->m_stats.total_frames / 10), this->m_stats.total_frames);
-    tuneSeek(next);
-}
-
-void StatusBar::Backward() {
-    u32 next = std::max(s64(this->m_stats.current_frame) - s64(this->m_stats.total_frames / 10), s64(0));
-    tuneSeek(next);
-}
-
-const AlphaSymbol &StatusBar::GetPlaybackSymbol() {
-    return this->m_playing ? symbol::pause::symbol : symbol::play::symbol;
-}
+#include "elm_status_bar.hpp"
+
+#include "symbol.hpp"
+#include "config/config.hpp"
+
+namespace {
+
+    char path_buffer[FS_MAX_PATH] = "";
+    char current_buffer[0x20] = "";
+    char total_buffer[0x20] = "";
+
+    void NullLastDot(char *str) {
+        char *end = str + strlen(str) - 1;
+        while (str != end) {
+            if (*end == '.') {
+                *end = '\0';
+                return;
+            }
+            end--;
+        }
+    }
+
+}
+
+StatusBar::StatusBar() {
+    if (R_FAILED(tuneGetRepeatMode(&this->m_repeat)))
+        this->m_repeat = TuneRepeatMode_Off;
+    if (R_FAILED(tuneGetShuffleMode(&this->m_shuffle)))
+        this->m_shuffle = TuneShuffleMode_Off;
+    if (R_FAILED(tuneGetCurrentQueueItem(path_buffer, FS_MAX_PATH, &this->m_stats))) {
+        path_buffer[0] = '\0';
+        this->m_stats = {};
+    }
+}
+
+tsl::elm::Element *StatusBar::requestFocus(tsl::elm::Element *oldFocus, tsl::FocusDirection direction) {
+    return this;
+}
+
+void StatusBar::draw(tsl::gfx::Renderer *renderer) {
+    if (this->m_touched && Element::getInputMode() == tsl::InputMode::Touch) {
+        renderer->drawRect(ELEMENT_BOUNDS(this), a(tsl::style::color::ColorClickAnimation));
+    }
+
+    if (this->m_text_width == 0) {
+        /* Get base width. */
+        auto [width, height] = renderer->drawString(this->m_current_track.data(), false, 0, 0, 26, tsl::style::color::ColorTransparent);
+        this->m_truncated = static_cast<s32>(width) > (this->getWidth() - 30);
+        if (this->m_truncated) {
+            /* Get width with spacing. */
+            this->m_scroll_text = std::string(m_current_track).append("       ");
+            auto [ex_width, ex_height] = renderer->drawString(this->m_scroll_text.c_str(), false, 0, 0, 26, tsl::style::color::ColorTransparent);
+            this->m_scroll_text.append(m_current_track);
+            this->m_text_width = ex_width;
+        } else {
+            this->m_text_width = width;
+        }
+    }
+
+    /* Current track. */
+    if (this->m_truncated) {
+        renderer->drawString(this->m_scroll_text.c_str(), false, this->getX() + 15 - this->m_scroll_offset, this->getY() + 40, 26, tsl::style::color::ColorText);
+        if (this->m_counter == 120) {
+            if (this->m_scroll_offset == this->m_text_width) {
+                this->m_scroll_offset = 0;
+                this->m_counter = 0;
+            } else {
+                this->m_scroll_offset++;
+            }
+        } else {
+            this->m_counter++;
+        }
+    } else {
+        renderer->drawString(this->m_current_track.data(), false, this->getX() + 15, this->getY() + 40, 26, tsl::style::color::ColorText);
+    }
+
+    /* Seek bar. */
+    u32 bar_length = this->getWidth() - 30;
+    renderer->drawRect(this->getX() + 15, this->getY() + tsl::style::ListItemDefaultHeight, bar_length, 3, 0xffff);
+
+    if (this->m_percentage > 0) {
+        renderer->drawCircle(this->getX() + 15, this->getY() + tsl::style::ListItemDefaultHeight + 1, 3, true, 0xf00f);
+        renderer->drawRect(this->getX() + 15, this->getY() + tsl::style::ListItemDefaultHeight - 2, bar_length * this->m_percentage, 7, 0xf00f);
+        renderer->drawCircle(this->getX() + 15 + bar_length * this->m_percentage, this->getY() + tsl::style::ListItemDefaultHeight + 1, 3, true, 0xf00f);
+    }
+
+    /* Progress */
+    renderer->drawString(current_buffer, false, this->getX() + 15, this->getY() + CenterOfLine(1) + 8, 20, 0xffff);
+
+    /* Repeat indicator */
+    auto repeat_color = this->m_repeat ? tsl::style::color::ColorHighlight : tsl::style::color::ColorHeaderBar;
+    if (this->m_repeat == TuneRepeatMode_One) {
+        symbol::repeat::one::symbol.draw(GetRepeatX(), GetRepeatY(), renderer, repeat_color);
+    } else {
+        symbol::repeat::all::symbol.draw(GetRepeatX(), GetRepeatY(), renderer, repeat_color);
+    }
+
+    /* Shuffle indicator */
+    auto shuffle_color = this->m_shuffle ? tsl::style::color::ColorHighlight : tsl::style::color::ColorHeaderBar;
+    symbol::shuffle::symbol.draw(GetShuffleX(), GetShuffleY(), renderer, shuffle_color);
+
+    /* Song length */
+    renderer->drawString(total_buffer, false, this->getX() + this->getWidth() - 75, this->getY() + CenterOfLine(1) + 8, 20, 0xffff);
+
+    /* Backward button */
+    symbol::backward::symbol.draw(GetBackwardX(), GetBackwardY(), renderer, tsl::style::color::ColorText);
+
+    /* Prev button */
+    symbol::prev::symbol.draw(GetPrevX(), GetPrevY(), renderer, tsl::style::color::ColorText);
+
+    /* Current playback glyph */
+    this->GetPlaybackSymbol().draw(GetPlayStateX(), GetPlayStateY(), renderer, tsl::style::color::ColorText);
+
+    /* Next button */
+    symbol::next::symbol.draw(GetNextX(), GetNextY(), renderer, tsl::style::color::ColorText);
+
+    /* Forward button */
+    symbol::forward::symbol.draw(GetForwardX(), GetForwardY(), renderer, tsl::style::color::ColorText);
+}
+
+void StatusBar::layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) {
+    this->setBoundaries(this->getX(), this->getY(), this->getWidth(), tsl::style::ListItemDefaultHeight * 3);
+}
+
+bool StatusBar::onClick(u64 keys) {
+    u8 handled = 0;
+    if (keys & HidNpadButton_A) {
+        this->CyclePlay();
+        handled++;
+    }
+    if (keys & HidNpadButton_X) {
+        this->CycleRepeat();
+        handled++;
+    }
+    if (keys & HidNpadButton_Y) {
+        this->CycleShuffle();
+        handled++;
+    }
+    if (keys & HidNpadButton_Right) {
+        this->Next();
+        handled++;
+    }
+    if (keys & HidNpadButton_Left) {
+        this->Prev();
+        handled++;
+    }
+    if (keys & HidNpadButton_ZL) {
+        this->Backward();
+        handled++;
+    }
+    if (keys & HidNpadButton_ZR) {
+        this->Forward();
+        handled++;
+    }
+    return handled;
+}
+
+#define TOUCHED(button) (currX > (Get##button##X() - 30) && currX < (Get##button##X() + 30) && prevY > (Get##button##Y() - 30) && prevY < (Get##button##Y() + 30))
+
+bool StatusBar::onTouch(tsl::elm::TouchEvent event, s32 currX, s32 currY, s32 prevX, s32 prevY, s32 initialX, s32 initialY) {
+    if (event == tsl::elm::TouchEvent::Touch)
+        this->m_touched = this->inBounds(currX, currY);
+
+    if (event == tsl::elm::TouchEvent::Release && this->m_touched) {
+        this->m_touched = false;
+
+        if (Element::getInputMode() == tsl::InputMode::Touch) {
+            u16 handled = 0;
+            if (TOUCHED(Repeat)) {
+                this->CycleRepeat();
+                handled++;
+            }
+            if (TOUCHED(Shuffle)) {
+                this->CycleShuffle();
+                handled++;
+            }
+            if (TOUCHED(PlayState)) {
+                this->CyclePlay();
+                handled++;
+            }
+            if (TOUCHED(Prev)) {
+                this->Prev();
+                handled++;
+            }
+            if (TOUCHED(Next)) {
+                this->Next();
+                handled++;
+            }
+            if (TOUCHED(Forward)) {
+                this->Forward();
+                handled++;
+            }
+            if (TOUCHED(Backward)) {
+                this->Backward();
+                handled++;
+            }
+
+            if (handled > 0) {
+                this->m_clickAnimationProgress = 0;
+                return true;
+            }
+        }
+    }
+
+    return false;
+}
+
+void StatusBar::update() {
+    if (R_FAILED(tuneGetStatus(&this->m_playing)))
+        this->m_playing = false;
+
+    if (R_SUCCEEDED(tuneGetCurrentQueueItem(path_buffer, FS_MAX_PATH, &this->m_stats))) {
+        /* Only show file name. Ignore path to file and extension. */
+        size_t length = std::strlen(path_buffer);
+        NullLastDot(path_buffer);
+        for (size_t i = length; i >= 0; i--) {
+            if (path_buffer[i] == '/') {
+                if (this->m_current_track != path_buffer + i + 1) {
+                    this->m_current_track = path_buffer + i + 1;
+                    this->m_text_width = 0;
+                    this->m_scroll_offset = 0;
+                    this->m_counter = 0;
+                }
+                break;
+            }
+        }
+    } else {
+        this->m_current_track = "Stopped!";
+        this->m_stats = {};
+        /* Reset scrolling text */
+        this->m_text_width = 0;
+        this->m_scroll_offset = 0;
+        this->m_counter = 0;
+    }
+    /* Progress text and bar */
+    u32 current = this->m_stats.current_frame / this->m_stats.sample_rate;
+    u32 total = this->m_stats.total_frames / this->m_stats.sample_rate;
+    this->m_percentage = std::clamp(double(this->m_stats.current_frame) / double(this->m_stats.total_frames), (double)0.0, (double)0.0);
+
+    std::snprintf(current_buffer, sizeof(current_buffer), "%d:%02d", current / 60, current % 60);
+    std::snprintf(total_buffer, sizeof(total_buffer), "%d:%02d", total / 60, total % 60);
+}
+
+void StatusBar::CycleRepeat() {
+    this->m_repeat = static_cast<TuneRepeatMode>((this->m_repeat + 1) % TuneRepeatMode_Count);
+    config::set_repeat(this->m_repeat);
+    tuneSetRepeatMode(this->m_repeat);
+}
+
+void StatusBar::CycleShuffle() {
+    this->m_shuffle = static_cast<TuneShuffleMode>((this->m_shuffle + 1) % TuneShuffleMode_Count);
+    config::set_shuffle(this->m_shuffle);
+    tuneSetShuffleMode(this->m_shuffle);
+}
+
+void StatusBar::CyclePlay() {
+    if (this->m_playing) {
+        tunePause();
+    } else {
+        tunePlay();
+    }
+}
+
+void StatusBar::Prev() {
+    tunePrev();
+}
+
+void StatusBar::Next() {
+    tuneNext();
+}
+
+void StatusBar::Forward() {
+    u32 next = std::min(this->m_stats.current_frame + (this->m_stats.total_frames / 10), this->m_stats.total_frames);
+    tuneSeek(next);
+}
+
+void StatusBar::Backward() {
+    u32 next = std::max(s64(this->m_stats.current_frame) - s64(this->m_stats.total_frames / 10), s64(0));
+    tuneSeek(next);
+}
+
+const AlphaSymbol &StatusBar::GetPlaybackSymbol() {
+    return this->m_playing ? symbol::pause::symbol : symbol::play::symbol;
+}
diff --git a/overlay/source/elm_status_bar.hpp b/overlay/source/elm_status_bar.hpp
index ed87a93..522c1d9 100644
--- a/overlay/source/elm_status_bar.hpp
+++ b/overlay/source/elm_status_bar.hpp
@@ -1,92 +1,92 @@
-#pragma once
-
-#include "../../ipc/tune.h"
-#include "symbol.hpp"
-
-#include </mnt/c/Users/carel/Downloads/sys-tune-master-128bitsound/sys-tune-master/libtesla/include/tesla.hpp>
-
-class StatusBar final : public tsl::elm::Element {
-  private:
-    bool m_playing;
-    TuneRepeatMode m_repeat;
-    TuneShuffleMode m_shuffle;
-    TuneCurrentStats m_stats;
-
-    double m_percentage;
-
-    std::string_view m_current_track;
-    std::string m_scroll_text;
-    u32 m_text_width;
-    u32 m_scroll_offset;
-    bool m_truncated;
-    u8 m_counter;
-
-    bool m_touched = false;
-
-  public:
-    StatusBar();
-
-    tsl::elm::Element *requestFocus(tsl::elm::Element *oldFocus, tsl::FocusDirection direction) override;
-    void draw(tsl::gfx::Renderer *renderer) override;
-    void layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) override;
-    bool onClick(u64 keys) override;
-    bool onTouch(tsl::elm::TouchEvent event, s32 currX, s32 currY, s32 prevX, s32 prevY, s32 initialX, s32 initialY) override;
-
-    void update();
-
-    void CycleRepeat();
-    void CyclePlay();
-    void CycleShuffle();
-    void Prev();
-    void Next();
-    void Backward();
-    void Forward();
-
-  private:
-    ALWAYS_INLINE constexpr s32 CenterOfLine(u8 line) {
-        return (tsl::style::ListItemDefaultHeight * line) + (tsl::style::ListItemDefaultHeight / 2);
-    }
-    ALWAYS_INLINE s32 GetRepeatX() {
-        return this->getX() + (this->getWidth() / 2) - 50;
-    }
-    ALWAYS_INLINE s32 GetRepeatY() {
-        return this->getY() + CenterOfLine(1);
-    }
-    ALWAYS_INLINE s32 GetShuffleX() {
-        return this->getX() + (this->getWidth() / 2) + 50;
-    }
-    ALWAYS_INLINE s32 GetShuffleY() {
-        return this->getY() + CenterOfLine(1);
-    }
-    ALWAYS_INLINE s32 GetPlayStateX() {
-        return this->getX() + (this->getWidth() / 2);
-    }
-    ALWAYS_INLINE s32 GetPlayStateY() {
-        return this->getY() + CenterOfLine(2);
-    }
-    ALWAYS_INLINE s32 GetBackwardX() {
-        return this->getX() + 30;
-    }
-    ALWAYS_INLINE s32 GetBackwardY() {
-        return this->getY() + CenterOfLine(2);
-    }
-    ALWAYS_INLINE s32 GetPrevX() {
-        return this->getX() + ((this->getWidth() / 4) * 1);
-    }
-    ALWAYS_INLINE s32 GetPrevY() {
-        return this->getY() + CenterOfLine(2);
-    }
-    ALWAYS_INLINE s32 GetNextX() {
-        return this->getX() + ((this->getWidth() / 4) * 3);
-    }
-    ALWAYS_INLINE s32 GetNextY() {
-        return this->getY() + CenterOfLine(2);
-    }
-    ALWAYS_INLINE s32 GetForwardX() {
-        return this->getX() + this->getWidth() - 30;
-    }
-    ALWAYS_INLINE s32 GetForwardY() {
-        return this->getY() + CenterOfLine(2);
-    }
-    const AlphaSymbol &GetPlaybackSymbol();
-};
+#pragma once
+
+#include "../../ipc/tune.h"
+#include "symbol.hpp"
+
+#include </mnt/c/Users/carel/Downloads/sys-tune-master-128bitsound/sys-tune-master/libtesla/include/tesla.hpp>
+
+class StatusBar final : public tsl::elm::Element {
+  private:
+    bool m_playing;
+    TuneRepeatMode m_repeat;
+    TuneShuffleMode m_shuffle;
+    TuneCurrentStats m_stats;
+
+    double m_percentage;
+
+    std::string_view m_current_track;
+    std::string m_scroll_text;
+    u32 m_text_width;
+    u32 m_scroll_offset;
+    bool m_truncated;
+    u8 m_counter;
+
+    bool m_touched = false;
+
+  public:
+    StatusBar();
+
+    tsl::elm::Element *requestFocus(tsl::elm::Element *oldFocus, tsl::FocusDirection direction) override;
+    void draw(tsl::gfx::Renderer *renderer) override;
+    void layout(u16 parentX, u16 parentY, u16 parentWidth, u16 parentHeight) override;
+    bool onClick(u64 keys) override;
+    bool onTouch(tsl::elm::TouchEvent event, s32 currX, s32 currY, s32 prevX, s32 prevY, s32 initialX, s32 initialY) override;
+
+    void update();
+
+    void CycleRepeat();
+    void CyclePlay();
+    void CycleShuffle();
+    void Prev();
+    void Next();
+    void Backward();
+    void Forward();
+
+  private:
+    ALWAYS_INLINE constexpr s32 CenterOfLine(u8 line) {
+        return (tsl::style::ListItemDefaultHeight * line) + (tsl::style::ListItemDefaultHeight / 2);
+    }
+    ALWAYS_INLINE s32 GetRepeatX() {
+        return this->getX() + (this->getWidth() / 2) - 50;
+    }
+    ALWAYS_INLINE s32 GetRepeatY() {
+        return this->getY() + CenterOfLine(1);
+    }
+    ALWAYS_INLINE s32 GetShuffleX() {
+        return this->getX() + (this->getWidth() / 2) + 50;
+    }
+    ALWAYS_INLINE s32 GetShuffleY() {
+        return this->getY() + CenterOfLine(1);
+    }
+    ALWAYS_INLINE s32 GetPlayStateX() {
+        return this->getX() + (this->getWidth() / 2);
+    }
+    ALWAYS_INLINE s32 GetPlayStateY() {
+        return this->getY() + CenterOfLine(2);
+    }
+    ALWAYS_INLINE s32 GetBackwardX() {
+        return this->getX() + 30;
+    }
+    ALWAYS_INLINE s32 GetBackwardY() {
+        return this->getY() + CenterOfLine(2);
+    }
+    ALWAYS_INLINE s32 GetPrevX() {
+        return this->getX() + ((this->getWidth() / 4) * 1);
+    }
+    ALWAYS_INLINE s32 GetPrevY() {
+        return this->getY() + CenterOfLine(2);
+    }
+    ALWAYS_INLINE s32 GetNextX() {
+        return this->getX() + ((this->getWidth() / 4) * 3);
+    }
+    ALWAYS_INLINE s32 GetNextY() {
+        return this->getY() + CenterOfLine(2);
+    }
+    ALWAYS_INLINE s32 GetForwardX() {
+        return this->getX() + this->getWidth() - 30;
+    }
+    ALWAYS_INLINE s32 GetForwardY() {
+        return this->getY() + CenterOfLine(2);
+    }
+    const AlphaSymbol &GetPlaybackSymbol();
+};
diff --git a/overlay/source/elm_volume.hpp b/overlay/source/elm_volume.hpp
index 2361ddc..8406b3b 100644
--- a/overlay/source/elm_volume.hpp
+++ b/overlay/source/elm_volume.hpp
@@ -1,29 +1,29 @@
-#pragma once
-
-#include </mnt/c/Users/carel/Downloads/sys-tune-master-128bitsound/sys-tune-master/libtesla/include/tesla.hpp>
-
-
-class ElmVolume final : public tsl::elm::StepTrackBar {
-public:
-    ElmVolume(const char icon[3], const std::string& name, size_t numSteps)
-        : StepTrackBar{icon, numSteps}, m_name{name} { }
-
-    virtual ~ElmVolume() {}
-
-    virtual void draw(tsl::gfx::Renderer *renderer) override {
-        const u16 trackBarWidth = this->getWidth() - 95;
-        const u16 stepWidth = trackBarWidth / (this->m_numSteps - 1);
-
-        for (u8 i = 0; i < this->m_numSteps; i++) {
-            renderer->drawRect(this->getX() + 60 + stepWidth * i, this->getY() + 50, 1, 10, a(tsl::style::color::ColorFrame));
-        }
-
-        const auto [descWidth, descHeight] = renderer->drawString(this->m_name.c_str(), false, 0, 0, 15, tsl::style::color::ColorTransparent);
-        renderer->drawString(this->m_name.c_str(), false, ((this->getX() + 60) + (this->getWidth() - 95) / 2) - (descWidth / 2), this->getY() + 20, 15, a(tsl::style::color::ColorDescription));
-
-        StepTrackBar::draw(renderer);
-    }
-
-private:
-    const std::string m_name;
-};
+#pragma once
+
+#include </mnt/c/Users/carel/Downloads/sys-tune-master-128bitsound/sys-tune-master/libtesla/include/tesla.hpp>
+
+
+class ElmVolume final : public tsl::elm::StepTrackBar {
+public:
+    ElmVolume(const char icon[3], const std::string& name, size_t numSteps)
+        : StepTrackBar{icon, numSteps}, m_name{name} { }
+
+    virtual ~ElmVolume() {}
+
+    virtual void draw(tsl::gfx::Renderer *renderer) override {
+        const u16 trackBarWidth = this->getWidth() - 95;
+        const u16 stepWidth = trackBarWidth / (this->m_numSteps - 1);
+
+        for (u8 i = 0; i < this->m_numSteps; i++) {
+            renderer->drawRect(this->getX() + 60 + stepWidth * i, this->getY() + 50, 1, 10, a(tsl::style::color::ColorFrame));
+        }
+
+        const auto [descWidth, descHeight] = renderer->drawString(this->m_name.c_str(), false, 0, 0, 15, tsl::style::color::ColorTransparent);
+        renderer->drawString(this->m_name.c_str(), false, ((this->getX() + 60) + (this->getWidth() - 95) / 2) - (descWidth / 2), this->getY() + 20, 15, a(tsl::style::color::ColorDescription));
+
+        StepTrackBar::draw(renderer);
+    }
+
+private:
+    const std::string m_name;
+};
diff --git a/overlay/source/gui_browser.cpp b/overlay/source/gui_browser.cpp
index 5e99a95..98f7d32 100644
--- a/overlay/source/gui_browser.cpp
+++ b/overlay/source/gui_browser.cpp
@@ -1,217 +1,217 @@
-#include "gui_browser.hpp"
-
-#include "tune.h"
-
-namespace {
-
-    bool ListItemTextCompare(const tsl::elm::ListItem *_lhs, const tsl::elm::ListItem *_rhs) {
-        return strcasecmp(_lhs->getText().c_str(), _rhs->getText().c_str()) < 0;
-    };
-
-    bool StringTextCompare(std::string _lhs, std::string _rhs) {
-        return strcasecmp(_lhs.c_str(), _rhs.c_str()) < 0;
-    };
-
-    ALWAYS_INLINE bool EndsWith(const char *name, const char *ext) {
-        return strcasecmp(name + std::strlen(name) - std::strlen(ext), ext) == 0;
-    }
-
-    constexpr const std::array SupportedTypes = {
-#ifdef WANT_MP3
-        ".mp3",
-#endif
-#ifdef WANT_FLAC
-        ".flac",
-#endif
-#ifdef WANT_WAV
-        ".wav",
-        ".wave",
-#endif
-    };
-
-    bool SupportsType(const char *name) {
-        for (auto &ext : SupportedTypes)
-            if (EndsWith(name, ext))
-                return true;
-        return false;
-    }
-
-    constexpr const char *const base_path = "/music/";
-
-    char path_buffer[FS_MAX_PATH];
-
-}
-
-
-BrowserGui::BrowserGui()
-    : m_fs(), has_music(), cwd("/") {
-    this->m_list = new tsl::elm::List();
-
-    /* Open sd card filesystem. */
-    Result rc = fsOpenSdCardFileSystem(&this->m_fs);
-    if (R_SUCCEEDED(rc)) {
-        /* Check if base path /music/ exists. */
-        FsDir dir;
-        std::strcpy(this->cwd, base_path);
-        if (R_SUCCEEDED(fsFsOpenDirectory(&this->m_fs, this->cwd, FsDirOpenMode_ReadFiles, &dir))) {
-            this->has_music = true;
-            fsDirClose(&dir);
-        } else {
-            this->cwd[1] = '\0';
-        }
-        this->scanCwd();
-    } else {
-        this->m_list->addItem(new tsl::elm::CategoryHeader("Couldn't open SdCard"));
-    }
-}
-
-BrowserGui::~BrowserGui() {
-    fsFsClose(&this->m_fs);
-}
-
-tsl::elm::Element *BrowserGui::createUI() {
-    m_frame = new SysTuneOverlayFrame();
-
-    m_frame->setDescription("\uE0E1  Back     \uE0E0  Add    \uE0E2  Add All");
-    m_frame->setContent(this->m_list);
-
-    return m_frame;
-}
-
-bool BrowserGui::handleInput(u64 keysDown, u64, const HidTouchState&, HidAnalogStickState, HidAnalogStickState) {
-    if (keysDown & HidNpadButton_B) {
-        if (this->has_music && this->cwd[7] == '\0') {
-            return false;
-        } else if (this->cwd[1] != '\0') {
-            this->upCwd();
-            return true;
-        }
-    } else if (keysDown & HidNpadButton_X) {
-        this->addAllToPlaylist();
-        return true;
-    }
-    return false;
-}
-
-void BrowserGui::scanCwd() {
-    tsl::Gui::removeFocus();
-    this->m_list->clear();
-
-    /* Show absolute folder path. */
-    this->m_list->addItem(new tsl::elm::CategoryHeader(this->cwd, true));
-
-    /* Open directory. */
-    FsDir dir;
-    Result rc = fsFsOpenDirectory(&this->m_fs, this->cwd, FsDirOpenMode_ReadDirs | FsDirOpenMode_ReadFiles, &dir);
-    if (R_FAILED(rc)) {
-        char result_buffer[0x10];
-        std::snprintf(result_buffer, 0x10, "2%03X-%04X", R_MODULE(rc), R_DESCRIPTION(rc));
-        this->m_list->addItem(new tsl::elm::ListItem("something went wrong :/"));
-        this->m_list->addItem(new tsl::elm::ListItem(result_buffer));
-        return;
-    }
-    tsl::hlp::ScopeGuard dirGuard([&] { fsDirClose(&dir); });
-
-    std::vector<tsl::elm::ListItem *> folders, files;
-
-    /* Iternate over directory. */
-    s64 count = 0;
-    FsDirectoryEntry entry;
-    while (R_SUCCEEDED(fsDirRead(&dir, &count, 1, &entry)) && count) {
-        if (entry.type == FsDirEntryType_Dir) {
-            /* Add directory entries. */
-            auto item = new tsl::elm::ListItem(entry.name);
-            item->setClickListener([this, item](u64 down) -> bool {
-                if (down & HidNpadButton_A) {
-                    std::strncat(this->cwd, item->getText().c_str(), sizeof(this->cwd) - 1);
-                    std::strncat(this->cwd, "/", sizeof(this->cwd) - 1);
-                    this->scanCwd();
-                    return true;
-                }
-                return false;
-            });
-            folders.push_back(item);
-        } else if (SupportsType(entry.name)) {
-            /* Add file entry. */
-            auto item = new tsl::elm::ListItem(entry.name);
-            item->setClickListener([this, item](u64 down) -> bool {
-                if (down & HidNpadButton_A) {
-                    std::snprintf(path_buffer, sizeof(path_buffer), "%s%s", this->cwd, item->getText().c_str());
-                    Result rc = tuneEnqueue(path_buffer, TuneEnqueueType_Back);
-                    if (R_SUCCEEDED(rc)) {
-                        m_frame->setToast("Playlist updated", "Added 1 song to Playlist.");
-                    } else {
-                        m_frame->setToast("Failed to add Track.", "Does the name contain umlauts?");
-                    }
-                    return true;
-                }
-                return false;
-            });
-            files.push_back(item);
-        }
-    }
-    if (folders.size() == 0 && files.size() == 0) {
-        this->m_list->addItem(new tsl::elm::CategoryHeader("Empty..."));
-        return;
-    }
-
-    if (folders.size() > 0) {
-        std::sort(folders.begin(), folders.end(), ListItemTextCompare);
-        for (auto element : folders)
-            this->m_list->addItem(element);
-    }
-    if (files.size() > 0) {
-        this->m_list->addItem(new tsl::elm::CategoryHeader("Files"));
-        std::sort(files.begin(), files.end(), ListItemTextCompare);
-        for (auto element : files)
-            this->m_list->addItem(element);
-    }
-}
-
-void BrowserGui::upCwd() {
-    size_t length = std::strlen(this->cwd);
-    if (length <= 1)
-        return;
-
-    for (size_t i = length - 2; i >= 0; i--) {
-        if (this->cwd[i] == '/') {
-            this->cwd[i + 1] = '\0';
-            this->scanCwd();
-            return;
-        }
-    }
-}
-
-void BrowserGui::addAllToPlaylist() {
-    FsDir dir;
-    Result rc = fsFsOpenDirectory(&this->m_fs, this->cwd, FsDirOpenMode_ReadFiles, &dir);
-    if (R_FAILED(rc)) {
-        char result_buffer[0x10];
-        std::snprintf(result_buffer, 0x10, "2%03X-%04X", R_MODULE(rc), R_DESCRIPTION(rc));
-        this->m_list->addItem(new tsl::elm::ListItem("something went wrong :/"));
-        this->m_list->addItem(new tsl::elm::ListItem(result_buffer));
-        return;
-    }
-    tsl::hlp::ScopeGuard dirGuard([&] { fsDirClose(&dir); });
-
-    std::vector<std::string> file_list;
-    s64 songs_added = 0;
-    s64 count = 0;
-    FsDirectoryEntry entry;
-    while (R_SUCCEEDED(fsDirRead(&dir, &count, 1, &entry)) && count){
-        if (entry.type == FsDirEntryType_File && SupportsType(entry.name)){
-            file_list.push_back(std::string(entry.name));
-            count++;
-        }
-    }
-
-    std::sort(file_list.begin(), file_list.end(), StringTextCompare);
-    for (auto const & file : file_list) {
-        std::snprintf(path_buffer, sizeof(path_buffer), "%s%s", this->cwd, file.c_str());
-        rc = tuneEnqueue(path_buffer, TuneEnqueueType_Back);
-        if (R_SUCCEEDED(rc)) songs_added++;
-    }
-
-    std::snprintf(path_buffer, sizeof(path_buffer), "Added %ld songs to Playlist.", songs_added);
-    m_frame->setToast("Playlist updated", path_buffer);
-}
+#include "gui_browser.hpp"
+
+#include "tune.h"
+
+namespace {
+
+    bool ListItemTextCompare(const tsl::elm::ListItem *_lhs, const tsl::elm::ListItem *_rhs) {
+        return strcasecmp(_lhs->getText().c_str(), _rhs->getText().c_str()) < 0;
+    };
+
+    bool StringTextCompare(std::string _lhs, std::string _rhs) {
+        return strcasecmp(_lhs.c_str(), _rhs.c_str()) < 0;
+    };
+
+    ALWAYS_INLINE bool EndsWith(const char *name, const char *ext) {
+        return strcasecmp(name + std::strlen(name) - std::strlen(ext), ext) == 0;
+    }
+
+    constexpr const std::array SupportedTypes = {
+#ifdef WANT_MP3
+        ".mp3",
+#endif
+#ifdef WANT_FLAC
+        ".flac",
+#endif
+#ifdef WANT_WAV
+        ".wav",
+        ".wave",
+#endif
+    };
+
+    bool SupportsType(const char *name) {
+        for (auto &ext : SupportedTypes)
+            if (EndsWith(name, ext))
+                return true;
+        return false;
+    }
+
+    constexpr const char *const base_path = "/music/";
+
+    char path_buffer[FS_MAX_PATH];
+
+}
+
+
+BrowserGui::BrowserGui()
+    : m_fs(), has_music(), cwd("/") {
+    this->m_list = new tsl::elm::List();
+
+    /* Open sd card filesystem. */
+    Result rc = fsOpenSdCardFileSystem(&this->m_fs);
+    if (R_SUCCEEDED(rc)) {
+        /* Check if base path /music/ exists. */
+        FsDir dir;
+        std::strcpy(this->cwd, base_path);
+        if (R_SUCCEEDED(fsFsOpenDirectory(&this->m_fs, this->cwd, FsDirOpenMode_ReadFiles, &dir))) {
+            this->has_music = true;
+            fsDirClose(&dir);
+        } else {
+            this->cwd[1] = '\0';
+        }
+        this->scanCwd();
+    } else {
+        this->m_list->addItem(new tsl::elm::CategoryHeader("Couldn't open SdCard"));
+    }
+}
+
+BrowserGui::~BrowserGui() {
+    fsFsClose(&this->m_fs);
+}
+
+tsl::elm::Element *BrowserGui::createUI() {
+    m_frame = new SysTuneOverlayFrame();
+
+    m_frame->setDescription("\uE0E1  Back     \uE0E0  Add    \uE0E2  Add All");
+    m_frame->setContent(this->m_list);
+
+    return m_frame;
+}
+
+bool BrowserGui::handleInput(u64 keysDown, u64, const HidTouchState&, HidAnalogStickState, HidAnalogStickState) {
+    if (keysDown & HidNpadButton_B) {
+        if (this->has_music && this->cwd[7] == '\0') {
+            return false;
+        } else if (this->cwd[1] != '\0') {
+            this->upCwd();
+            return true;
+        }
+    } else if (keysDown & HidNpadButton_X) {
+        this->addAllToPlaylist();
+        return true;
+    }
+    return false;
+}
+
+void BrowserGui::scanCwd() {
+    tsl::Gui::removeFocus();
+    this->m_list->clear();
+
+    /* Show absolute folder path. */
+    this->m_list->addItem(new tsl::elm::CategoryHeader(this->cwd, true));
+
+    /* Open directory. */
+    FsDir dir;
+    Result rc = fsFsOpenDirectory(&this->m_fs, this->cwd, FsDirOpenMode_ReadDirs | FsDirOpenMode_ReadFiles, &dir);
+    if (R_FAILED(rc)) {
+        char result_buffer[0x10];
+        std::snprintf(result_buffer, 0x10, "2%03X-%04X", R_MODULE(rc), R_DESCRIPTION(rc));
+        this->m_list->addItem(new tsl::elm::ListItem("something went wrong :/"));
+        this->m_list->addItem(new tsl::elm::ListItem(result_buffer));
+        return;
+    }
+    tsl::hlp::ScopeGuard dirGuard([&] { fsDirClose(&dir); });
+
+    std::vector<tsl::elm::ListItem *> folders, files;
+
+    /* Iternate over directory. */
+    s64 count = 0;
+    FsDirectoryEntry entry;
+    while (R_SUCCEEDED(fsDirRead(&dir, &count, 1, &entry)) && count) {
+        if (entry.type == FsDirEntryType_Dir) {
+            /* Add directory entries. */
+            auto item = new tsl::elm::ListItem(entry.name);
+            item->setClickListener([this, item](u64 down) -> bool {
+                if (down & HidNpadButton_A) {
+                    std::strncat(this->cwd, item->getText().c_str(), sizeof(this->cwd) - 1);
+                    std::strncat(this->cwd, "/", sizeof(this->cwd) - 1);
+                    this->scanCwd();
+                    return true;
+                }
+                return false;
+            });
+            folders.push_back(item);
+        } else if (SupportsType(entry.name)) {
+            /* Add file entry. */
+            auto item = new tsl::elm::ListItem(entry.name);
+            item->setClickListener([this, item](u64 down) -> bool {
+                if (down & HidNpadButton_A) {
+                    std::snprintf(path_buffer, sizeof(path_buffer), "%s%s", this->cwd, item->getText().c_str());
+                    Result rc = tuneEnqueue(path_buffer, TuneEnqueueType_Back);
+                    if (R_SUCCEEDED(rc)) {
+                        m_frame->setToast("Playlist updated", "Added 1 song to Playlist.");
+                    } else {
+                        m_frame->setToast("Failed to add Track.", "Does the name contain umlauts?");
+                    }
+                    return true;
+                }
+                return false;
+            });
+            files.push_back(item);
+        }
+    }
+    if (folders.size() == 0 && files.size() == 0) {
+        this->m_list->addItem(new tsl::elm::CategoryHeader("Empty..."));
+        return;
+    }
+
+    if (folders.size() > 0) {
+        std::sort(folders.begin(), folders.end(), ListItemTextCompare);
+        for (auto element : folders)
+            this->m_list->addItem(element);
+    }
+    if (files.size() > 0) {
+        this->m_list->addItem(new tsl::elm::CategoryHeader("Files"));
+        std::sort(files.begin(), files.end(), ListItemTextCompare);
+        for (auto element : files)
+            this->m_list->addItem(element);
+    }
+}
+
+void BrowserGui::upCwd() {
+    size_t length = std::strlen(this->cwd);
+    if (length <= 1)
+        return;
+
+    for (size_t i = length - 2; i >= 0; i--) {
+        if (this->cwd[i] == '/') {
+            this->cwd[i + 1] = '\0';
+            this->scanCwd();
+            return;
+        }
+    }
+}
+
+void BrowserGui::addAllToPlaylist() {
+    FsDir dir;
+    Result rc = fsFsOpenDirectory(&this->m_fs, this->cwd, FsDirOpenMode_ReadFiles, &dir);
+    if (R_FAILED(rc)) {
+        char result_buffer[0x10];
+        std::snprintf(result_buffer, 0x10, "2%03X-%04X", R_MODULE(rc), R_DESCRIPTION(rc));
+        this->m_list->addItem(new tsl::elm::ListItem("something went wrong :/"));
+        this->m_list->addItem(new tsl::elm::ListItem(result_buffer));
+        return;
+    }
+    tsl::hlp::ScopeGuard dirGuard([&] { fsDirClose(&dir); });
+
+    std::vector<std::string> file_list;
+    s64 songs_added = 0;
+    s64 count = 0;
+    FsDirectoryEntry entry;
+    while (R_SUCCEEDED(fsDirRead(&dir, &count, 1, &entry)) && count){
+        if (entry.type == FsDirEntryType_File && SupportsType(entry.name)){
+            file_list.push_back(std::string(entry.name));
+            count++;
+        }
+    }
+
+    std::sort(file_list.begin(), file_list.end(), StringTextCompare);
+    for (auto const & file : file_list) {
+        std::snprintf(path_buffer, sizeof(path_buffer), "%s%s", this->cwd, file.c_str());
+        rc = tuneEnqueue(path_buffer, TuneEnqueueType_Back);
+        if (R_SUCCEEDED(rc)) songs_added++;
+    }
+
+    std::snprintf(path_buffer, sizeof(path_buffer), "Added %ld songs to Playlist.", songs_added);
+    m_frame->setToast("Playlist updated", path_buffer);
+}
diff --git a/overlay/source/gui_browser.hpp b/overlay/source/gui_browser.hpp
index af4f253..e029528 100644
--- a/overlay/source/gui_browser.hpp
+++ b/overlay/source/gui_browser.hpp
@@ -1,27 +1,27 @@
-#pragma once
-
-#include </mnt/c/Users/carel/Downloads/sys-tune-master-128bitsound/sys-tune-master/libtesla/include/tesla.hpp>
-
-#include "elm_overlayframe.hpp"
-
-class BrowserGui final : public tsl::Gui {
-  private:
-    SysTuneOverlayFrame* m_frame;
-    tsl::elm::List *m_list;
-    FsFileSystem m_fs;
-    bool has_music;
-    char cwd[FS_MAX_PATH];
-
-  public:
-    BrowserGui();
-    ~BrowserGui();
-
-    tsl::elm::Element *createUI() override;
-    bool handleInput(u64 keysDown, u64, const HidTouchState&, HidAnalogStickState, HidAnalogStickState) override;
-
-  private:
-    void scanCwd();
-    void upCwd();
-    void addAllToPlaylist();
-    void infoAlert(const std::string &title, const std::string &text);
-};
+#pragma once
+
+#include </mnt/c/Users/carel/Downloads/sys-tune-master-128bitsound/sys-tune-master/libtesla/include/tesla.hpp>
+
+#include "elm_overlayframe.hpp"
+
+class BrowserGui final : public tsl::Gui {
+  private:
+    SysTuneOverlayFrame* m_frame;
+    tsl::elm::List *m_list;
+    FsFileSystem m_fs;
+    bool has_music;
+    char cwd[FS_MAX_PATH];
+
+  public:
+    BrowserGui();
+    ~BrowserGui();
+
+    tsl::elm::Element *createUI() override;
+    bool handleInput(u64 keysDown, u64, const HidTouchState&, HidAnalogStickState, HidAnalogStickState) override;
+
+  private:
+    void scanCwd();
+    void upCwd();
+    void addAllToPlaylist();
+    void infoAlert(const std::string &title, const std::string &text);
+};
diff --git a/overlay/source/gui_error.cpp b/overlay/source/gui_error.cpp
index 066d201..6b50841 100644
--- a/overlay/source/gui_error.cpp
+++ b/overlay/source/gui_error.cpp
@@ -1,35 +1,35 @@
-#include "gui_error.hpp"
-
-#include "elm_overlayframe.hpp"
-
-namespace {
-
-    char result_buffer[10];
-
-}
-
-ErrorGui::ErrorGui(const char *msg, Result rc) : m_msg(msg) {
-    if (rc) {
-        std::snprintf(result_buffer, 10, "2%03d-%04d", R_MODULE(rc), R_DESCRIPTION(rc));
-        m_result = result_buffer;
-    }
-}
-
-tsl::elm::Element *ErrorGui::createUI() {
-    auto rootFrame = new SysTuneOverlayFrame();
-
-    auto custom = new tsl::elm::CustomDrawer([this](tsl::gfx::Renderer *drawer, u16 x, u16 y, u16 w, u16 h) {
-        drawer->drawString("\uE150", false, x + (w / 2) - (90 / 2), 300, 90, 0xffff);
-        auto [width, height] = drawer->drawString(this->m_msg, false, x + (w / 2) - (this->msgW / 2), 380, 25, 0xffff);
-        if (msgW == 0) {
-            msgW = width;
-            msgH = height;
-        }
-        if (this->m_result)
-            drawer->drawString(this->m_result, false, 120, 430, 25, 0xffff);
-    });
-
-    rootFrame->setContent(custom);
-
-    return rootFrame;
-}
+#include "gui_error.hpp"
+
+#include "elm_overlayframe.hpp"
+
+namespace {
+
+    char result_buffer[10];
+
+}
+
+ErrorGui::ErrorGui(const char *msg, Result rc) : m_msg(msg) {
+    if (rc) {
+        std::snprintf(result_buffer, 10, "2%03d-%04d", R_MODULE(rc), R_DESCRIPTION(rc));
+        m_result = result_buffer;
+    }
+}
+
+tsl::elm::Element *ErrorGui::createUI() {
+    auto rootFrame = new SysTuneOverlayFrame();
+
+    auto custom = new tsl::elm::CustomDrawer([this](tsl::gfx::Renderer *drawer, u16 x, u16 y, u16 w, u16 h) {
+        drawer->drawString("\uE150", false, x + (w / 2) - (90 / 2), 300, 90, 0xffff);
+        auto [width, height] = drawer->drawString(this->m_msg, false, x + (w / 2) - (this->msgW / 2), 380, 25, 0xffff);
+        if (msgW == 0) {
+            msgW = width;
+            msgH = height;
+        }
+        if (this->m_result)
+            drawer->drawString(this->m_result, false, 120, 430, 25, 0xffff);
+    });
+
+    rootFrame->setContent(custom);
+
+    return rootFrame;
+}
diff --git a/overlay/source/gui_error.hpp b/overlay/source/gui_error.hpp
index bbe385a..6f0190b 100644
--- a/overlay/source/gui_error.hpp
+++ b/overlay/source/gui_error.hpp
@@ -1,15 +1,15 @@
-#pragma once
-
-#include </mnt/c/Users/carel/Downloads/sys-tune-master-128bitsound/sys-tune-master/libtesla/include/tesla.hpp>
-
-class ErrorGui final : public tsl::Gui {
-  private:
-    const char *m_msg    = nullptr;
-    const char *m_result = nullptr;
-    u32 msgW = 0, msgH = 0;
-
-  public:
-    ErrorGui(const char *msg, Result rc);
-
-    tsl::elm::Element *createUI() override;
-};
+#pragma once
+
+#include </mnt/c/Users/carel/Downloads/sys-tune-master-128bitsound/sys-tune-master/libtesla/include/tesla.hpp>
+
+class ErrorGui final : public tsl::Gui {
+  private:
+    const char *m_msg    = nullptr;
+    const char *m_result = nullptr;
+    u32 msgW = 0, msgH = 0;
+
+  public:
+    ErrorGui(const char *msg, Result rc);
+
+    tsl::elm::Element *createUI() override;
+};
diff --git a/overlay/source/gui_main.cpp b/overlay/source/gui_main.cpp
index 3ad8657..d9cb04f 100644
--- a/overlay/source/gui_main.cpp
+++ b/overlay/source/gui_main.cpp
@@ -1,150 +1,150 @@
-#include "gui_main.hpp"
-
-#include "elm_overlayframe.hpp"
-#include "elm_volume.hpp"
-#include "gui_browser.hpp"
-#include "gui_playlist.hpp"
-#include "pm/pm.hpp"
-#include "config/config.hpp"
-
-namespace {
-    constexpr const size_t num_steps = 66;
-}
-
-MainGui::MainGui() {
-    m_status_bar    = new StatusBar();
-}
-
-tsl::elm::Element *MainGui::createUI() {
-    auto frame = new SysTuneOverlayFrame();
-    auto list  = new tsl::elm::List();
-
-    u64 pid{}, tid{};
-    pm::getCurrentPidTid(&pid, &tid);
-
-    /* Current track. */
-    list->addItem(this->m_status_bar, tsl::style::ListItemDefaultHeight * 2);
-
-    /* Playlist. */
-    auto queue_button = new tsl::elm::ListItem("Playlist");
-    queue_button->setClickListener([](u64 keys) {
-        if (keys & HidNpadButton_A) {
-            tsl::changeTo<PlaylistGui>();
-            return true;
-        }
-        return false;
-    });
-    list->addItem(queue_button);
-
-    /* Browser. */
-    auto browser_button = new tsl::elm::ListItem("Music browser");
-    browser_button->setClickListener([](u64 keys) {
-        if (keys & HidNpadButton_A) {
-            tsl::changeTo<BrowserGui>();
-            return true;
-        }
-        return false;
-    });
-    list->addItem(browser_button);
-
-    /* Volume indicator */
-    list->addItem(new tsl::elm::CategoryHeader("Volume Control"));
-
-    /* Get initial volume. */
-    double tune_volume = 1.f;
-    double title_volume = 1.f;
-    double default_title_volume = 1.f;
-    double tune_bass = 1.f;
-
-    tuneGetVolume(&tune_volume);
-    tuneGetTitleVolume(&title_volume);
-    tuneGetDefaultTitleVolume(&default_title_volume);
-    tuneGetBass(&tune_bass);
-
-    auto tune_volume_slider = new ElmVolume("\uE13C", "Tune Volume", num_steps);
-    tune_volume_slider->setProgress(tune_volume * num_steps);
-    tune_volume_slider->setValueChangedListener([](u8 value){
-        const double volume = double(value) / double(num_steps);
-        tuneSetVolume(volume*6);
-    });
-    list->addItem(tune_volume_slider);
-
-    // empty pid means we are qlaunch :)
-    if (tid && pid) {
-        auto title_volume_slider = new ElmVolume("\uE13C", "Game Volume", num_steps);
-        title_volume_slider->setProgress(title_volume * num_steps);
-        title_volume_slider->setValueChangedListener([tid](u8 value){
-            const double volume = double(value) / double(num_steps);
-            tuneSetTitleVolume(volume*6);
-            config::set_title_volume(tid, volume*6);
-        });
-        list->addItem(title_volume_slider);
-    }
-
-    auto default_title_volume_slider = new ElmVolume("\uE13C", "Game Volume (default)", num_steps);
-    default_title_volume_slider->setProgress(default_title_volume * num_steps);
-    default_title_volume_slider->setValueChangedListener([](u8 value){
-        const double volume = double(value) / double(num_steps);
-        tuneSetDefaultTitleVolume(volume*6);
-    });
-    list->addItem(default_title_volume_slider);
-
-    auto tune_bass_slider = new ElmVolume("\uE13C", "Bass Boost", num_steps);
-    tune_bass_slider->setProgress(tune_bass * num_steps);
-    tune_bass_slider->setValueChangedListener([](u8 value){
-        const double bass = double(value) / double(num_steps);
-        tuneSetBass(bass / 2);
-    });
-    list->addItem(tune_bass_slider);
-
-    list->addItem(new tsl::elm::CategoryHeader("Play / Pause"));
-
-    /* Per title tune toggle. */
-    auto tune_play = new tsl::elm::ToggleListItem("Tune", config::get_title_enabled(tid), "Play", "Pause");
-    tune_play->setStateChangedListener([tid](bool new_value) {
-        config::set_title_enabled(tid, new_value);
-        if (new_value) {
-            tunePlay();
-        } else {
-            tunePause();
-        }
-    });
-    list->addItem(tune_play);
-
-    /* Default title tune toggle. */
-    auto tune_default_play = new tsl::elm::ToggleListItem("Tune (default)", config::get_title_enabled_default(), "Play", "Pause");
-    tune_default_play->setStateChangedListener([](bool new_value) {
-        config::set_title_enabled_default(new_value);
-        if (new_value) {
-            tunePlay();
-        } else {
-            tunePause();
-        }
-    });
-    list->addItem(tune_default_play);
-
-    list->addItem(new tsl::elm::CategoryHeader("Misc"));
-
-    auto exit_button = new tsl::elm::ListItem("Close sys-tune");
-    exit_button->setClickListener([](u64 keys) {
-        if (keys & HidNpadButton_A) {
-            tuneQuit();
-            tsl::goBack();
-            return true;
-        }
-        return false;
-    });
-    list->addItem(exit_button);
-
-    frame->setContent(list);
-
-    return frame;
-}
-
-void MainGui::update() {
-    static u8 tick = 0;
-    /* Update status 4 times per second. */
-    if ((tick % 15) == 0)
-        this->m_status_bar->update();
-    tick++;
-}
+#include "gui_main.hpp"
+
+#include "elm_overlayframe.hpp"
+#include "elm_volume.hpp"
+#include "gui_browser.hpp"
+#include "gui_playlist.hpp"
+#include "pm/pm.hpp"
+#include "config/config.hpp"
+
+namespace {
+    constexpr const size_t num_steps = 66;
+}
+
+MainGui::MainGui() {
+    m_status_bar    = new StatusBar();
+}
+
+tsl::elm::Element *MainGui::createUI() {
+    auto frame = new SysTuneOverlayFrame();
+    auto list  = new tsl::elm::List();
+
+    u64 pid{}, tid{};
+    pm::getCurrentPidTid(&pid, &tid);
+
+    /* Current track. */
+    list->addItem(this->m_status_bar, tsl::style::ListItemDefaultHeight * 2);
+
+    /* Playlist. */
+    auto queue_button = new tsl::elm::ListItem("Playlist");
+    queue_button->setClickListener([](u64 keys) {
+        if (keys & HidNpadButton_A) {
+            tsl::changeTo<PlaylistGui>();
+            return true;
+        }
+        return false;
+    });
+    list->addItem(queue_button);
+
+    /* Browser. */
+    auto browser_button = new tsl::elm::ListItem("Music browser");
+    browser_button->setClickListener([](u64 keys) {
+        if (keys & HidNpadButton_A) {
+            tsl::changeTo<BrowserGui>();
+            return true;
+        }
+        return false;
+    });
+    list->addItem(browser_button);
+
+    /* Volume indicator */
+    list->addItem(new tsl::elm::CategoryHeader("Volume Control"));
+
+    /* Get initial volume. */
+    double tune_volume = 1.f;
+    double title_volume = 1.f;
+    double default_title_volume = 1.f;
+    double tune_bass = 1.f;
+
+    tuneGetVolume(&tune_volume);
+    tuneGetTitleVolume(&title_volume);
+    tuneGetDefaultTitleVolume(&default_title_volume);
+    tuneGetBass(&tune_bass);
+
+    auto tune_volume_slider = new ElmVolume("\uE13C", "Tune Volume", num_steps);
+    tune_volume_slider->setProgress(tune_volume * num_steps);
+    tune_volume_slider->setValueChangedListener([](u8 value){
+        const double volume = double(value) / double(num_steps);
+        tuneSetVolume(volume*6);
+    });
+    list->addItem(tune_volume_slider);
+
+    // empty pid means we are qlaunch :)
+    if (tid && pid) {
+        auto title_volume_slider = new ElmVolume("\uE13C", "Game Volume", num_steps);
+        title_volume_slider->setProgress(title_volume * num_steps);
+        title_volume_slider->setValueChangedListener([tid](u8 value){
+            const double volume = double(value) / double(num_steps);
+            tuneSetTitleVolume(volume*6);
+            config::set_title_volume(tid, volume*6);
+        });
+        list->addItem(title_volume_slider);
+    }
+
+    auto default_title_volume_slider = new ElmVolume("\uE13C", "Game Volume (default)", num_steps);
+    default_title_volume_slider->setProgress(default_title_volume * num_steps);
+    default_title_volume_slider->setValueChangedListener([](u8 value){
+        const double volume = double(value) / double(num_steps);
+        tuneSetDefaultTitleVolume(volume*6);
+    });
+    list->addItem(default_title_volume_slider);
+
+    auto tune_bass_slider = new ElmVolume("\uE13C", "Bass Boost", num_steps);
+    tune_bass_slider->setProgress(tune_bass * num_steps);
+    tune_bass_slider->setValueChangedListener([](u8 value){
+        const double bass = double(value) / double(num_steps);
+        tuneSetBass(bass / 2);
+    });
+    list->addItem(tune_bass_slider);
+
+    list->addItem(new tsl::elm::CategoryHeader("Play / Pause"));
+
+    /* Per title tune toggle. */
+    auto tune_play = new tsl::elm::ToggleListItem("Tune", config::get_title_enabled(tid), "Play", "Pause");
+    tune_play->setStateChangedListener([tid](bool new_value) {
+        config::set_title_enabled(tid, new_value);
+        if (new_value) {
+            tunePlay();
+        } else {
+            tunePause();
+        }
+    });
+    list->addItem(tune_play);
+
+    /* Default title tune toggle. */
+    auto tune_default_play = new tsl::elm::ToggleListItem("Tune (default)", config::get_title_enabled_default(), "Play", "Pause");
+    tune_default_play->setStateChangedListener([](bool new_value) {
+        config::set_title_enabled_default(new_value);
+        if (new_value) {
+            tunePlay();
+        } else {
+            tunePause();
+        }
+    });
+    list->addItem(tune_default_play);
+
+    list->addItem(new tsl::elm::CategoryHeader("Misc"));
+
+    auto exit_button = new tsl::elm::ListItem("Close sys-tune");
+    exit_button->setClickListener([](u64 keys) {
+        if (keys & HidNpadButton_A) {
+            tuneQuit();
+            tsl::goBack();
+            return true;
+        }
+        return false;
+    });
+    list->addItem(exit_button);
+
+    frame->setContent(list);
+
+    return frame;
+}
+
+void MainGui::update() {
+    static u8 tick = 0;
+    /* Update status 4 times per second. */
+    if ((tick % 15) == 0)
+        this->m_status_bar->update();
+    tick++;
+}
diff --git a/overlay/source/gui_main.cpp.bak b/overlay/source/gui_main.cpp.bak
index 086edf5..2cea899 100644
--- a/overlay/source/gui_main.cpp.bak
+++ b/overlay/source/gui_main.cpp.bak
@@ -1,150 +1,150 @@
-#include "gui_main.hpp"
-
-#include "elm_overlayframe.hpp"
-#include "elm_volume.hpp"
-#include "gui_browser.hpp"
-#include "gui_playlist.hpp"
-#include "pm/pm.hpp"
-#include "config/config.hpp"
-
-namespace {
-    constexpr const size_t num_steps = 66;
-}
-
-MainGui::MainGui() {
-    m_status_bar    = new StatusBar();
-}
-
-tsl::elm::Element *MainGui::createUI() {
-    auto frame = new SysTuneOverlayFrame();
-    auto list  = new tsl::elm::List();
-
-    u64 pid{}, tid{};
-    pm::getCurrentPidTid(&pid, &tid);
-
-    /* Current track. */
-    list->addItem(this->m_status_bar, tsl::style::ListItemDefaultHeight * 2);
-
-    /* Playlist. */
-    auto queue_button = new tsl::elm::ListItem("Playlist");
-    queue_button->setClickListener([](u64 keys) {
-        if (keys & HidNpadButton_A) {
-            tsl::changeTo<PlaylistGui>();
-            return true;
-        }
-        return false;
-    });
-    list->addItem(queue_button);
-
-    /* Browser. */
-    auto browser_button = new tsl::elm::ListItem("Music browser");
-    browser_button->setClickListener([](u64 keys) {
-        if (keys & HidNpadButton_A) {
-            tsl::changeTo<BrowserGui>();
-            return true;
-        }
-        return false;
-    });
-    list->addItem(browser_button);
-
-    /* Volume indicator */
-    list->addItem(new tsl::elm::CategoryHeader("Volume Control"));
-
-    /* Get initial volume. */
-    double tune_volume = 1.f;
-    double title_volume = 1.f;
-    double default_title_volume = 1.f;
-    double tune_bass = 1.f;
-
-    tuneGetVolume(&tune_volume);
-    tuneGetTitleVolume(&title_volume);
-    tuneGetDefaultTitleVolume(&default_title_volume);
-    tuneGetBass(&tune_bass);
-
-    auto tune_volume_slider = new ElmVolume("\uE13C", "Tune Volume", num_steps);
-    tune_volume_slider->setProgress(tune_volume * num_steps);
-    tune_volume_slider->setValueChangedListener([](u8 value){
-        const double volume = double(value) / double(num_steps);
-        tuneSetVolume(volume*6);
-    });
-    list->addItem(tune_volume_slider);
-
-    // empty pid means we are qlaunch :)
-    if (tid && pid) {
-        auto title_volume_slider = new ElmVolume("\uE13C", "Game Volume", num_steps);
-        title_volume_slider->setProgress(title_volume * num_steps);
-        title_volume_slider->setValueChangedListener([tid](u8 value){
-            const double volume = double(value) / double(num_steps);
-            tuneSetTitleVolume(volume*6);
-            config::set_title_volume(tid, volume*6);
-        });
-        list->addItem(title_volume_slider);
-    }
-
-    auto default_title_volume_slider = new ElmVolume("\uE13C", "Game Volume (default)", num_steps);
-    default_title_volume_slider->setProgress(default_title_volume * num_steps);
-    default_title_volume_slider->setValueChangedListener([](u8 value){
-        const double volume = double(value) / double(num_steps);
-        tuneSetDefaultTitleVolume(volume*6);
-    });
-    list->addItem(default_title_volume_slider);
-
-    auto tune_bass_slider = new ElmVolume("\uE13C", "Bass Boost", num_steps);
-    tune_bass_slider->setProgress(tune_bass * num_steps);
-    tune_bass_slider->setValueChangedListener([](u8 value){
-        const double tune_bass = double(value) / double(num_steps);
-        tuneSetBass(tune_bass / 2);
-    });
-    list->addItem(tune_bass_slider);
-
-    list->addItem(new tsl::elm::CategoryHeader("Play / Pause"));
-
-    /* Per title tune toggle. */
-    auto tune_play = new tsl::elm::ToggleListItem("Tune", config::get_title_enabled(tid), "Play", "Pause");
-    tune_play->setStateChangedListener([tid](bool new_value) {
-        config::set_title_enabled(tid, new_value);
-        if (new_value) {
-            tunePlay();
-        } else {
-            tunePause();
-        }
-    });
-    list->addItem(tune_play);
-
-    /* Default title tune toggle. */
-    auto tune_default_play = new tsl::elm::ToggleListItem("Tune (default)", config::get_title_enabled_default(), "Play", "Pause");
-    tune_default_play->setStateChangedListener([](bool new_value) {
-        config::set_title_enabled_default(new_value);
-        if (new_value) {
-            tunePlay();
-        } else {
-            tunePause();
-        }
-    });
-    list->addItem(tune_default_play);
-
-    list->addItem(new tsl::elm::CategoryHeader("Misc"));
-
-    auto exit_button = new tsl::elm::ListItem("Close sys-tune");
-    exit_button->setClickListener([](u64 keys) {
-        if (keys & HidNpadButton_A) {
-            tuneQuit();
-            tsl::goBack();
-            return true;
-        }
-        return false;
-    });
-    list->addItem(exit_button);
-
-    frame->setContent(list);
-
-    return frame;
-}
-
-void MainGui::update() {
-    static u8 tick = 0;
-    /* Update status 4 times per second. */
-    if ((tick % 15) == 0)
-        this->m_status_bar->update();
-    tick++;
-}
+#include "gui_main.hpp"
+
+#include "elm_overlayframe.hpp"
+#include "elm_volume.hpp"
+#include "gui_browser.hpp"
+#include "gui_playlist.hpp"
+#include "pm/pm.hpp"
+#include "config/config.hpp"
+
+namespace {
+    constexpr const size_t num_steps = 66;
+}
+
+MainGui::MainGui() {
+    m_status_bar    = new StatusBar();
+}
+
+tsl::elm::Element *MainGui::createUI() {
+    auto frame = new SysTuneOverlayFrame();
+    auto list  = new tsl::elm::List();
+
+    u64 pid{}, tid{};
+    pm::getCurrentPidTid(&pid, &tid);
+
+    /* Current track. */
+    list->addItem(this->m_status_bar, tsl::style::ListItemDefaultHeight * 2);
+
+    /* Playlist. */
+    auto queue_button = new tsl::elm::ListItem("Playlist");
+    queue_button->setClickListener([](u64 keys) {
+        if (keys & HidNpadButton_A) {
+            tsl::changeTo<PlaylistGui>();
+            return true;
+        }
+        return false;
+    });
+    list->addItem(queue_button);
+
+    /* Browser. */
+    auto browser_button = new tsl::elm::ListItem("Music browser");
+    browser_button->setClickListener([](u64 keys) {
+        if (keys & HidNpadButton_A) {
+            tsl::changeTo<BrowserGui>();
+            return true;
+        }
+        return false;
+    });
+    list->addItem(browser_button);
+
+    /* Volume indicator */
+    list->addItem(new tsl::elm::CategoryHeader("Volume Control"));
+
+    /* Get initial volume. */
+    double tune_volume = 1.f;
+    double title_volume = 1.f;
+    double default_title_volume = 1.f;
+    double tune_bass = 1.f;
+
+    tuneGetVolume(&tune_volume);
+    tuneGetTitleVolume(&title_volume);
+    tuneGetDefaultTitleVolume(&default_title_volume);
+    tuneGetBass(&tune_bass);
+
+    auto tune_volume_slider = new ElmVolume("\uE13C", "Tune Volume", num_steps);
+    tune_volume_slider->setProgress(tune_volume * num_steps);
+    tune_volume_slider->setValueChangedListener([](u8 value){
+        const double volume = double(value) / double(num_steps);
+        tuneSetVolume(volume*6);
+    });
+    list->addItem(tune_volume_slider);
+
+    // empty pid means we are qlaunch :)
+    if (tid && pid) {
+        auto title_volume_slider = new ElmVolume("\uE13C", "Game Volume", num_steps);
+        title_volume_slider->setProgress(title_volume * num_steps);
+        title_volume_slider->setValueChangedListener([tid](u8 value){
+            const double volume = double(value) / double(num_steps);
+            tuneSetTitleVolume(volume*6);
+            config::set_title_volume(tid, volume*6);
+        });
+        list->addItem(title_volume_slider);
+    }
+
+    auto default_title_volume_slider = new ElmVolume("\uE13C", "Game Volume (default)", num_steps);
+    default_title_volume_slider->setProgress(default_title_volume * num_steps);
+    default_title_volume_slider->setValueChangedListener([](u8 value){
+        const double volume = double(value) / double(num_steps);
+        tuneSetDefaultTitleVolume(volume*6);
+    });
+    list->addItem(default_title_volume_slider);
+
+    auto tune_bass_slider = new ElmVolume("\uE13C", "Bass Boost", num_steps);
+    tune_bass_slider->setProgress(tune_bass * num_steps);
+    tune_bass_slider->setValueChangedListener([](u8 value){
+        const double tune_bass = double(value) / double(num_steps);
+        tuneSetBass(tune_bass / 2);
+    });
+    list->addItem(tune_bass_slider);
+
+    list->addItem(new tsl::elm::CategoryHeader("Play / Pause"));
+
+    /* Per title tune toggle. */
+    auto tune_play = new tsl::elm::ToggleListItem("Tune", config::get_title_enabled(tid), "Play", "Pause");
+    tune_play->setStateChangedListener([tid](bool new_value) {
+        config::set_title_enabled(tid, new_value);
+        if (new_value) {
+            tunePlay();
+        } else {
+            tunePause();
+        }
+    });
+    list->addItem(tune_play);
+
+    /* Default title tune toggle. */
+    auto tune_default_play = new tsl::elm::ToggleListItem("Tune (default)", config::get_title_enabled_default(), "Play", "Pause");
+    tune_default_play->setStateChangedListener([](bool new_value) {
+        config::set_title_enabled_default(new_value);
+        if (new_value) {
+            tunePlay();
+        } else {
+            tunePause();
+        }
+    });
+    list->addItem(tune_default_play);
+
+    list->addItem(new tsl::elm::CategoryHeader("Misc"));
+
+    auto exit_button = new tsl::elm::ListItem("Close sys-tune");
+    exit_button->setClickListener([](u64 keys) {
+        if (keys & HidNpadButton_A) {
+            tuneQuit();
+            tsl::goBack();
+            return true;
+        }
+        return false;
+    });
+    list->addItem(exit_button);
+
+    frame->setContent(list);
+
+    return frame;
+}
+
+void MainGui::update() {
+    static u8 tick = 0;
+    /* Update status 4 times per second. */
+    if ((tick % 15) == 0)
+        this->m_status_bar->update();
+    tick++;
+}
diff --git a/overlay/source/gui_main.hpp b/overlay/source/gui_main.hpp
index 5531590..855c61a 100644
--- a/overlay/source/gui_main.hpp
+++ b/overlay/source/gui_main.hpp
@@ -1,18 +1,18 @@
-#pragma once
-
-#include "tune.h"
-#include "elm_status_bar.hpp"
-
-#include </mnt/c/Users/carel/Downloads/sys-tune-master-128bitsound/sys-tune-master/libtesla/include/tesla.hpp>
-
-class MainGui final : public tsl::Gui {
-  private:
-    StatusBar *m_status_bar;
-
-  public:
-    MainGui();
-
-    tsl::elm::Element *createUI() final;
-
-    void update() final;
-};
+#pragma once
+
+#include "tune.h"
+#include "elm_status_bar.hpp"
+
+#include </mnt/c/Users/carel/Downloads/sys-tune-master-128bitsound/sys-tune-master/libtesla/include/tesla.hpp>
+
+class MainGui final : public tsl::Gui {
+  private:
+    StatusBar *m_status_bar;
+
+  public:
+    MainGui();
+
+    tsl::elm::Element *createUI() final;
+
+    void update() final;
+};
diff --git a/overlay/source/gui_playlist.cpp b/overlay/source/gui_playlist.cpp
index f701ff8..df035ba 100644
--- a/overlay/source/gui_playlist.cpp
+++ b/overlay/source/gui_playlist.cpp
@@ -1,121 +1,121 @@
-#include "gui_playlist.hpp"
-
-#include "elm_overlayframe.hpp"
-#include "tune.h"
-
-namespace {
-
-    void NullLastDot(char *str) {
-        char *end = str + strlen(str) - 1;
-        while (str != end) {
-            if (*end == '.') {
-                *end = '\0';
-                return;
-            }
-            end--;
-        }
-    }
-
-    class ButtonListItem final : public tsl::elm::ListItem {
-      public:
-        template <typename Text, typename Value>
-        ButtonListItem(Text &text, Value &value) : ListItem(std::forward<Text>(text), std::forward<Value>(value)) {}
-
-        bool onTouch(tsl::elm::TouchEvent event, s32 currX, s32 currY, s32 prevX, s32 prevY, s32 initialX, s32 initialY) override {
-            if (event == tsl::elm::TouchEvent::Touch)
-                this->m_touched = this->inBounds(currX, currY);
-
-            if (event == tsl::elm::TouchEvent::Release && this->m_touched) {
-                this->m_touched = false;
-
-                if (Element::getInputMode() == tsl::InputMode::Touch) {
-                    bool handled = false;
-                    if (currX > this->getLeftBound() && currX < (this->getRightBound() - this->getHeight()) && currY > this->getTopBound() && currY < this->getBottomBound())
-                        handled = this->onClick(HidNpadButton_A);
-
-                    if (currX > (this->getRightBound() - this->getHeight()) && currX < this->getRightBound() && currY > this->getTopBound() && currY < this->getBottomBound())
-                        handled = this->onClick(HidNpadButton_Y);
-
-                    this->m_clickAnimationProgress = 0;
-                    return handled;
-                }
-            }
-
-            return false;
-        }
-    };
-
-}
-
-PlaylistGui::PlaylistGui() {
-    m_list = new tsl::elm::List();
-
-    u32 count;
-    Result rc = tuneGetPlaylistSize(&count);
-    if (R_FAILED(rc)) {
-        char result_buffer[0x10];
-        std::snprintf(result_buffer, 0x10, "2%03X-%04X", R_MODULE(rc), R_DESCRIPTION(rc));
-        this->m_list->addItem(new tsl::elm::ListItem("something went wrong :/"));
-        this->m_list->addItem(new tsl::elm::ListItem(result_buffer));
-        return;
-    }
-
-    if (count == 0) {
-        m_list->addItem(new tsl::elm::ListItem("Playlist empty."));
-        return;
-    }
-
-    char path[FS_MAX_PATH];
-    for (u32 i = 0; i < count; i++) {
-        rc = tuneGetPlaylistItem(i, path, FS_MAX_PATH);
-        if (R_FAILED(rc))
-            break;
-
-        char *str = path;
-        size_t length   = std::strlen(str);
-        NullLastDot(str);
-        for (size_t i = length; i >= 0; i--) {
-            if (str[i] == '/') {
-                str = str + i + 1;
-                break;
-            }
-        }
-        auto item = new ButtonListItem(str, "\uE098");
-        item->setClickListener([this, item](u64 keys) -> bool {
-            u32 index  = this->m_list->getIndexInList(item);
-            u8 counter = 0;
-            if (keys & HidNpadButton_A) {
-                tuneSelect(index);
-                counter++;
-            }
-            if (keys & HidNpadButton_Y) {
-                if (R_SUCCEEDED(tuneRemove(index))) {
-                    this->removeFocus();
-                    this->m_list->removeIndex(index);
-                    auto element = this->m_list->getItemAtIndex(index + 1);
-                    if (element != nullptr) {
-                        this->requestFocus(element, tsl::FocusDirection::Down);
-                        this->m_list->setFocusedIndex(index + 1);
-                    } else if (index > 0) {
-                        element = this->m_list->getItemAtIndex(index - 1);
-                        this->requestFocus(element, tsl::FocusDirection::Up);
-                        this->m_list->setFocusedIndex(index - 1);
-                    }
-                }
-
-                counter++;
-            }
-            return counter;
-        });
-        m_list->addItem(item);
-    }
-}
-
-tsl::elm::Element *PlaylistGui::createUI() {
-    auto rootFrame = new SysTuneOverlayFrame();
-
-    rootFrame->setContent(this->m_list);
-    rootFrame->setDescription("\uE0E1  Back     \uE0E0  Play   \uE0E3  Remove");
-
-    return rootFrame;
-}
+#include "gui_playlist.hpp"
+
+#include "elm_overlayframe.hpp"
+#include "tune.h"
+
+namespace {
+
+    void NullLastDot(char *str) {
+        char *end = str + strlen(str) - 1;
+        while (str != end) {
+            if (*end == '.') {
+                *end = '\0';
+                return;
+            }
+            end--;
+        }
+    }
+
+    class ButtonListItem final : public tsl::elm::ListItem {
+      public:
+        template <typename Text, typename Value>
+        ButtonListItem(Text &text, Value &value) : ListItem(std::forward<Text>(text), std::forward<Value>(value)) {}
+
+        bool onTouch(tsl::elm::TouchEvent event, s32 currX, s32 currY, s32 prevX, s32 prevY, s32 initialX, s32 initialY) override {
+            if (event == tsl::elm::TouchEvent::Touch)
+                this->m_touched = this->inBounds(currX, currY);
+
+            if (event == tsl::elm::TouchEvent::Release && this->m_touched) {
+                this->m_touched = false;
+
+                if (Element::getInputMode() == tsl::InputMode::Touch) {
+                    bool handled = false;
+                    if (currX > this->getLeftBound() && currX < (this->getRightBound() - this->getHeight()) && currY > this->getTopBound() && currY < this->getBottomBound())
+                        handled = this->onClick(HidNpadButton_A);
+
+                    if (currX > (this->getRightBound() - this->getHeight()) && currX < this->getRightBound() && currY > this->getTopBound() && currY < this->getBottomBound())
+                        handled = this->onClick(HidNpadButton_Y);
+
+                    this->m_clickAnimationProgress = 0;
+                    return handled;
+                }
+            }
+
+            return false;
+        }
+    };
+
+}
+
+PlaylistGui::PlaylistGui() {
+    m_list = new tsl::elm::List();
+
+    u32 count;
+    Result rc = tuneGetPlaylistSize(&count);
+    if (R_FAILED(rc)) {
+        char result_buffer[0x10];
+        std::snprintf(result_buffer, 0x10, "2%03X-%04X", R_MODULE(rc), R_DESCRIPTION(rc));
+        this->m_list->addItem(new tsl::elm::ListItem("something went wrong :/"));
+        this->m_list->addItem(new tsl::elm::ListItem(result_buffer));
+        return;
+    }
+
+    if (count == 0) {
+        m_list->addItem(new tsl::elm::ListItem("Playlist empty."));
+        return;
+    }
+
+    char path[FS_MAX_PATH];
+    for (u32 i = 0; i < count; i++) {
+        rc = tuneGetPlaylistItem(i, path, FS_MAX_PATH);
+        if (R_FAILED(rc))
+            break;
+
+        char *str = path;
+        size_t length   = std::strlen(str);
+        NullLastDot(str);
+        for (size_t i = length; i >= 0; i--) {
+            if (str[i] == '/') {
+                str = str + i + 1;
+                break;
+            }
+        }
+        auto item = new ButtonListItem(str, "\uE098");
+        item->setClickListener([this, item](u64 keys) -> bool {
+            u32 index  = this->m_list->getIndexInList(item);
+            u8 counter = 0;
+            if (keys & HidNpadButton_A) {
+                tuneSelect(index);
+                counter++;
+            }
+            if (keys & HidNpadButton_Y) {
+                if (R_SUCCEEDED(tuneRemove(index))) {
+                    this->removeFocus();
+                    this->m_list->removeIndex(index);
+                    auto element = this->m_list->getItemAtIndex(index + 1);
+                    if (element != nullptr) {
+                        this->requestFocus(element, tsl::FocusDirection::Down);
+                        this->m_list->setFocusedIndex(index + 1);
+                    } else if (index > 0) {
+                        element = this->m_list->getItemAtIndex(index - 1);
+                        this->requestFocus(element, tsl::FocusDirection::Up);
+                        this->m_list->setFocusedIndex(index - 1);
+                    }
+                }
+
+                counter++;
+            }
+            return counter;
+        });
+        m_list->addItem(item);
+    }
+}
+
+tsl::elm::Element *PlaylistGui::createUI() {
+    auto rootFrame = new SysTuneOverlayFrame();
+
+    rootFrame->setContent(this->m_list);
+    rootFrame->setDescription("\uE0E1  Back     \uE0E0  Play   \uE0E3  Remove");
+
+    return rootFrame;
+}
diff --git a/overlay/source/gui_playlist.hpp b/overlay/source/gui_playlist.hpp
index 08f2f59..ddae578 100644
--- a/overlay/source/gui_playlist.hpp
+++ b/overlay/source/gui_playlist.hpp
@@ -1,13 +1,13 @@
-#pragma once
-
-#include </mnt/c/Users/carel/Downloads/sys-tune-master-128bitsound/sys-tune-master/libtesla/include/tesla.hpp>
-
-class PlaylistGui final : public tsl::Gui {
-  private:
-    tsl::elm::List *m_list;
-
-  public:
-    PlaylistGui();
-
-    tsl::elm::Element *createUI() override;
-};
+#pragma once
+
+#include </mnt/c/Users/carel/Downloads/sys-tune-master-128bitsound/sys-tune-master/libtesla/include/tesla.hpp>
+
+class PlaylistGui final : public tsl::Gui {
+  private:
+    tsl::elm::List *m_list;
+
+  public:
+    PlaylistGui();
+
+    tsl::elm::Element *createUI() override;
+};
diff --git a/overlay/source/main.cpp b/overlay/source/main.cpp
index ef1114f..198dd50 100644
--- a/overlay/source/main.cpp
+++ b/overlay/source/main.cpp
@@ -1,94 +1,94 @@
-#define TESLA_INIT_IMPL
-#include "tune.h"
-#include "gui_error.hpp"
-#include "gui_main.hpp"
-#include "sdmc/sdmc.hpp"
-#include "pm/pm.hpp"
-#include "config/config.hpp"
-
-#include </mnt/c/Users/carel/Downloads/sys-tune-master-128bitsound/sys-tune-master/libtesla/include/tesla.hpp>
-
-class SysTuneOverlay final : public tsl::Overlay {
-  private:
-    const char *msg = nullptr;
-    Result fail     = 0;
-
-  public:
-    void initServices() override {
-        if (R_FAILED(pm::Initialize())) {
-            this->msg  = "Failed pm::Initialize()";
-            return;
-        }
-
-        // don't open sys-tune if blacklisted title is active!
-        u64 pid{}, tid{};
-        pm::getCurrentPidTid(&pid, &tid);
-
-        if (config::get_title_blacklist(tid)) {
-            this->msg =
-                "Title is blacklisted!\n"
-                "Exit to use sys-tune";
-            return;
-        }
-
-        Result rc = tuneInitialize();
-
-        // not found can happen if the service isn't started
-        // connection refused can happen is the service was terminated by pmshell
-        if (R_VALUE(rc) == KERNELRESULT(NotFound) || R_VALUE(rc) == KERNELRESULT(ConnectionRefused)) {
-            u64 pid = 0;
-            const NcmProgramLocation programLocation{
-                .program_id = 0x4200000000000000,
-                .storageID  = NcmStorageId_None,
-            };
-            rc = pmshellInitialize();
-            if (R_SUCCEEDED(rc)) {
-                rc = pmshellLaunchProgram(0, &programLocation, &pid);
-                pmshellExit();
-            }
-            if (R_FAILED(rc) || pid == 0) {
-                this->fail = rc;
-                this->msg  = "  Failed to\n"
-                            "launch sysmodule";
-                return;
-            }
-            svcSleepThread(500'000'000ULL);
-            rc = tuneInitialize();
-        }
-
-        if (R_FAILED(rc)) {
-            this->msg  = "Something went wrong:";
-            this->fail = rc;
-            return;
-        }
-
-        if (R_FAILED(sdmc::Open())) {
-            this->msg  = "Failed sdmc::Open()";
-            return;
-        }
-
-        u32 api;
-        if (R_FAILED(tuneGetApiVersion(&api)) || api != TUNE_API_VERSION) {
-            this->msg = "   Unsupported\n"
-                        "sys-tune version!";
-        }
-    }
-
-    void exitServices() override {
-        sdmc::Close();
-        pm::Exit();
-        tuneExit();
-    }
-
-    std::unique_ptr<tsl::Gui> loadInitialGui() override {
-        if (this->msg) {
-            return std::make_unique<ErrorGui>(this->msg, this->fail);
-        } else {
-            return std::make_unique<MainGui>();
-        }
-    }
-};
-
-int main(int argc, char **argv) {
-    return tsl::loop<SysTuneOverlay>(argc, argv);
-}
+#define TESLA_INIT_IMPL
+#include "tune.h"
+#include "gui_error.hpp"
+#include "gui_main.hpp"
+#include "sdmc/sdmc.hpp"
+#include "pm/pm.hpp"
+#include "config/config.hpp"
+
+#include </mnt/c/Users/carel/Downloads/sys-tune-master-128bitsound/sys-tune-master/libtesla/include/tesla.hpp>
+
+class SysTuneOverlay final : public tsl::Overlay {
+  private:
+    const char *msg = nullptr;
+    Result fail     = 0;
+
+  public:
+    void initServices() override {
+        if (R_FAILED(pm::Initialize())) {
+            this->msg  = "Failed pm::Initialize()";
+            return;
+        }
+
+        // don't open sys-tune if blacklisted title is active!
+        u64 pid{}, tid{};
+        pm::getCurrentPidTid(&pid, &tid);
+
+        if (config::get_title_blacklist(tid)) {
+            this->msg =
+                "Title is blacklisted!\n"
+                "Exit to use sys-tune";
+            return;
+        }
+
+        Result rc = tuneInitialize();
+
+        // not found can happen if the service isn't started
+        // connection refused can happen is the service was terminated by pmshell
+        if (R_VALUE(rc) == KERNELRESULT(NotFound) || R_VALUE(rc) == KERNELRESULT(ConnectionRefused)) {
+            u64 pid = 0;
+            const NcmProgramLocation programLocation{
+                .program_id = 0x4200000000000000,
+                .storageID  = NcmStorageId_None,
+            };
+            rc = pmshellInitialize();
+            if (R_SUCCEEDED(rc)) {
+                rc = pmshellLaunchProgram(0, &programLocation, &pid);
+                pmshellExit();
+            }
+            if (R_FAILED(rc) || pid == 0) {
+                this->fail = rc;
+                this->msg  = "  Failed to\n"
+                            "launch sysmodule";
+                return;
+            }
+            svcSleepThread(500'000'000ULL);
+            rc = tuneInitialize();
+        }
+
+        if (R_FAILED(rc)) {
+            this->msg  = "Something went wrong:";
+            this->fail = rc;
+            return;
+        }
+
+        if (R_FAILED(sdmc::Open())) {
+            this->msg  = "Failed sdmc::Open()";
+            return;
+        }
+
+        u32 api;
+        if (R_FAILED(tuneGetApiVersion(&api)) || api != TUNE_API_VERSION) {
+            this->msg = "   Unsupported\n"
+                        "sys-tune version!";
+        }
+    }
+
+    void exitServices() override {
+        sdmc::Close();
+        pm::Exit();
+        tuneExit();
+    }
+
+    std::unique_ptr<tsl::Gui> loadInitialGui() override {
+        if (this->msg) {
+            return std::make_unique<ErrorGui>(this->msg, this->fail);
+        } else {
+            return std::make_unique<MainGui>();
+        }
+    }
+};
+
+int main(int argc, char **argv) {
+    return tsl::loop<SysTuneOverlay>(argc, argv);
+}
diff --git a/overlay/source/symbol.hpp b/overlay/source/symbol.hpp
index ba76c7a..6a2fddb 100644
--- a/overlay/source/symbol.hpp
+++ b/overlay/source/symbol.hpp
@@ -1,535 +1,535 @@
-#pragma once
-
-#include </mnt/c/Users/carel/Downloads/sys-tune-master-128bitsound/sys-tune-master/libtesla/include/tesla.hpp>
-
-class AlphaSymbol {
-  private:
-    s32 m_width;
-    s32 m_height;
-    const u8 *m_data;
-
-  public:
-    constexpr AlphaSymbol(size_t width, size_t height, const u8 *data)
-        : m_width(width), m_height(height), m_data(data) {}
-
-    void draw(s32 x, s32 y, tsl::gfx::Renderer *renderer, tsl::Color color) const {
-        const u8 *ptr = this->m_data;
-        const s32 x_offset = x - (m_width / 2);
-        const s32 y_offset = y - (m_height / 2);
-        for (s32 y1 = 0; y1 < m_height; y1++) {
-            for (s32 x1 = 0; x1 < m_width; x1++) {
-                color.a = static_cast<u16>(ptr[0] >> 4);
-                renderer->setPixelBlendSrc(x_offset + x1, y_offset + y1, renderer->a(color));
-                ptr++;
-            }
-        }
-    }
-};
-
-namespace symbol {
-    namespace repeat {
-        namespace all {
-            constexpr const size_t height = 30;
-            constexpr const size_t width = 29;
-            constexpr const unsigned char data[height * width] = {
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x74, 0xf0, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //..................+@%........
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xda, 0xff, 0xff, 0x92, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //..................@@@%.......
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0xff, 0xff, 0xff, 0x92, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //..................@@@@%......
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0xff, 0xff, 0xff, 0xff, 0x92, 0x00, 0x00, 0x00, 0x00, 0x00, //..................@@@@@%.....
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x84, 0xc4, 0xde, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0x92, 0x00, 0x00, 0x00, 0x00, //......%@@@@@@@@@@@@@@@@@%....
-                0x00, 0x00, 0x00, 0x14, 0xa8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x36, 0x00, 0x00, 0x00, //....%@@@@@@@@@@@@@@@@@@@@....
-                0x00, 0x00, 0x2c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc, 0x20, 0x00, 0x00, 0x00, //...@@@@@@@@@@@@@@@@@@@@@@....
-                0x00, 0x1c, 0xe6, 0xff, 0xff, 0xff, 0xff, 0xf8, 0xce, 0xb0, 0xae, 0xae, 0xae, 0xae, 0xae, 0xae, 0xae, 0xae, 0xf4, 0xff, 0xff, 0xff, 0xff, 0xfc, 0x56, 0x00, 0x00, 0x00, 0x00, //..@@@@@@@%%%%%%%%%@@@@@@+....
-                0x00, 0xbc, 0xff, 0xff, 0xff, 0xf4, 0x6c, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0xff, 0xff, 0xff, 0xfc, 0x56, 0x00, 0x30, 0xbc, 0x70, 0x00, //.%@@@@+...........@@@@@+..%+.
-                0x44, 0xff, 0xff, 0xff, 0xf0, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0xff, 0xff, 0xfc, 0x56, 0x00, 0x10, 0xe6, 0xff, 0xff, 0x34, //+@@@@.............@@@@+..@@@.
-                0x9c, 0xff, 0xff, 0xff, 0x5c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd2, 0xff, 0xfc, 0x56, 0x00, 0x00, 0x2e, 0xff, 0xff, 0xff, 0x90, //%@@@+.............@@@+...@@@%
-                0xda, 0xff, 0xff, 0xea, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4a, 0xb6, 0x4c, 0x00, 0x00, 0x00, 0x02, 0xe6, 0xff, 0xff, 0xcc, //@@@@..............+%+....@@@@
-                0xf2, 0xff, 0xff, 0xba, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb2, 0xff, 0xff, 0xea, //@@@%.....................%@@@
-                0xf2, 0xff, 0xff, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb0, 0xff, 0xff, 0xf6, //@@@%.....................%@@@
-                0xd6, 0xff, 0xff, 0xd4, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x70, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe2, 0xff, 0xff, 0xde, //@@@@.....+...............@@@@
-                0xa8, 0xff, 0xff, 0xff, 0x20, 0x00, 0x00, 0x2a, 0xe8, 0xff, 0xc4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0xff, 0xff, 0xff, 0xaa, //%@@@....@@@..............@@@%
-                0x4c, 0xff, 0xff, 0xfa, 0x1c, 0x00, 0x2a, 0xe8, 0xff, 0xff, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0xd8, 0xff, 0xff, 0xff, 0x5a, //+@@@...@@@@.............@@@@+
-                0x04, 0xc8, 0xf8, 0x5c, 0x00, 0x2a, 0xe8, 0xff, 0xff, 0xff, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xcc, 0xff, 0xff, 0xff, 0xd6, 0x02, //.@@+..@@@@@............@@@@@.
-                0x00, 0x00, 0x06, 0x00, 0x2a, 0xe8, 0xff, 0xff, 0xff, 0xff, 0xec, 0x6e, 0x6e, 0x6e, 0x6e, 0x6e, 0x6e, 0x6e, 0x6e, 0x70, 0x8c, 0xbc, 0xfa, 0xff, 0xff, 0xff, 0xf8, 0x3a, 0x00, //.....@@@@@@+++++++++%%@@@@@..
-                0x00, 0x00, 0x00, 0x10, 0xe8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfa, 0x56, 0x00, 0x00, //....@@@@@@@@@@@@@@@@@@@@@@+..
-                0x00, 0x00, 0x00, 0x50, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe2, 0x3a, 0x00, 0x00, 0x00, //...+@@@@@@@@@@@@@@@@@@@@@....
-                0x00, 0x00, 0x00, 0x08, 0xc6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc, 0xc6, 0x6e, 0x08, 0x00, 0x00, 0x00, 0x00, //....@@@@@@@@@@@@@@@@@@@+.....
-                0x00, 0x00, 0x00, 0x00, 0x0c, 0xc6, 0xff, 0xff, 0xff, 0xff, 0xe4, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x20, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.....@@@@@@..................
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0xc6, 0xff, 0xff, 0xff, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......@@@@@..................
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0xc6, 0xff, 0xff, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.......@@@@..................
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0xc4, 0xff, 0xa6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //........@@%..................
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x36, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
-            };
-            constexpr const AlphaSymbol symbol(width, height, data);
-        }
-
-        namespace one {
-            constexpr const size_t height = 30;
-            constexpr const size_t width = 29;
-            constexpr const unsigned char data[height * width] = {
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x74, 0xf0, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //..................+@%........
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xda, 0xff, 0xff, 0x92, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //..................@@@%.......
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0xff, 0xff, 0xff, 0x92, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //..................@@@@%......
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0xff, 0xff, 0xff, 0xff, 0x92, 0x00, 0x00, 0x00, 0x00, 0x00, //..................@@@@@%.....
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x84, 0xc4, 0xde, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0x92, 0x00, 0x00, 0x00, 0x00, //......%@@@@@@@@@@@@@@@@@%....
-                0x00, 0x00, 0x00, 0x14, 0xa8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x36, 0x00, 0x00, 0x00, //....%@@@@@@@@@@@@@@@@@@@@....
-                0x00, 0x00, 0x2c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc, 0x20, 0x00, 0x00, 0x00, //...@@@@@@@@@@@@@@@@@@@@@@....
-                0x00, 0x1c, 0xe6, 0xff, 0xff, 0xff, 0xff, 0xf8, 0xce, 0xb0, 0xae, 0xae, 0xae, 0xae, 0xae, 0xae, 0xae, 0xae, 0xf4, 0xff, 0xff, 0xff, 0xff, 0xfc, 0x56, 0x00, 0x00, 0x00, 0x00, //..@@@@@@@%%%%%%%%%@@@@@@+....
-                0x00, 0xbc, 0xff, 0xff, 0xff, 0xf4, 0x6c, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0xff, 0xff, 0xff, 0xfc, 0x56, 0x00, 0x30, 0xbc, 0x70, 0x00, //.%@@@@+...........@@@@@+..%+.
-                0x44, 0xff, 0xff, 0xff, 0xf0, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3a, 0x7c, 0x22, 0x00, 0xde, 0xff, 0xff, 0xfc, 0x56, 0x00, 0x10, 0xe6, 0xff, 0xff, 0x34, //+@@@@..........+..@@@@+..@@@.
-                0x9c, 0xff, 0xff, 0xff, 0x5c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6c, 0xff, 0xff, 0x7a, 0x00, 0xd2, 0xff, 0xfc, 0x56, 0x00, 0x00, 0x2e, 0xff, 0xff, 0xff, 0x90, //%@@@+........+@@+.@@@+...@@@%
-                0xda, 0xff, 0xff, 0xea, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0xfc, 0xff, 0xff, 0x7a, 0x00, 0x4a, 0xb6, 0x4c, 0x00, 0x00, 0x00, 0x02, 0xe6, 0xff, 0xff, 0xcc, //@@@@.........@@@+.+%+....@@@@
-                0xf2, 0xff, 0xff, 0xba, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0xb4, 0xff, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb2, 0xff, 0xff, 0xea, //@@@%.........+%@+........%@@@
-                0xf2, 0xff, 0xff, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9a, 0xff, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb0, 0xff, 0xff, 0xf6, //@@@%..........%@+........%@@@
-                0xd6, 0xff, 0xff, 0xd4, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x70, 0x1c, 0x00, 0x00, 0x08, 0xa2, 0xff, 0x84, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe2, 0xff, 0xff, 0xde, //@@@@.....+....%@%........@@@@
-                0xa8, 0xff, 0xff, 0xff, 0x20, 0x00, 0x00, 0x2a, 0xe8, 0xff, 0xc4, 0x00, 0x14, 0xfa, 0xff, 0xff, 0xff, 0xec, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0xff, 0xff, 0xff, 0xaa, //%@@@....@@@..@@@@@.......@@@%
-                0x4c, 0xff, 0xff, 0xfa, 0x1c, 0x00, 0x2a, 0xe8, 0xff, 0xff, 0xde, 0x00, 0x0a, 0xb2, 0xbe, 0xbe, 0xbe, 0xa4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0xd8, 0xff, 0xff, 0xff, 0x5a, //+@@@...@@@@..%%%%%......@@@@+
-                0x04, 0xc8, 0xf8, 0x5c, 0x00, 0x2a, 0xe8, 0xff, 0xff, 0xff, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xcc, 0xff, 0xff, 0xff, 0xd6, 0x02, //.@@+..@@@@@............@@@@@.
-                0x00, 0x00, 0x06, 0x00, 0x2a, 0xe8, 0xff, 0xff, 0xff, 0xff, 0xec, 0x6e, 0x6e, 0x6e, 0x6e, 0x6e, 0x6e, 0x6e, 0x6e, 0x70, 0x8c, 0xbc, 0xfa, 0xff, 0xff, 0xff, 0xf8, 0x3a, 0x00, //.....@@@@@@+++++++++%%@@@@@..
-                0x00, 0x00, 0x00, 0x10, 0xe8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfa, 0x56, 0x00, 0x00, //....@@@@@@@@@@@@@@@@@@@@@@+..
-                0x00, 0x00, 0x00, 0x50, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe2, 0x3a, 0x00, 0x00, 0x00, //...+@@@@@@@@@@@@@@@@@@@@@....
-                0x00, 0x00, 0x00, 0x08, 0xc6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc, 0xc6, 0x6e, 0x08, 0x00, 0x00, 0x00, 0x00, //....@@@@@@@@@@@@@@@@@@@+.....
-                0x00, 0x00, 0x00, 0x00, 0x0c, 0xc6, 0xff, 0xff, 0xff, 0xff, 0xe4, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x20, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.....@@@@@@..................
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0xc6, 0xff, 0xff, 0xff, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......@@@@@..................
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0xc6, 0xff, 0xff, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.......@@@@..................
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0xc4, 0xff, 0xa6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //........@@%..................
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x36, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
-            };
-            constexpr const AlphaSymbol symbol(width, height, data);
-        }
-    }
-
-    namespace sd {
-        constexpr const size_t height = 30;
-        constexpr const size_t width = 23;
-        constexpr const unsigned char data[height * width] = {
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe6, 0x10, //......@@@@@@@@@@@@@@@@.
-            0x00, 0x00, 0x00, 0x00, 0x1c, 0xdc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x82, //.....@@@@@@@@@@@@@@@@@%
-            0x00, 0x00, 0x00, 0x1a, 0xda, 0xff, 0xe8, 0xbe, 0xbe, 0xde, 0xff, 0xd8, 0xbe, 0xbe, 0xf0, 0xff, 0xc8, 0xbe, 0xc0, 0xff, 0xff, 0xff, 0xb8, //....@@@%%@@@%%@@@%@@@@%
-            0x00, 0x00, 0x18, 0xd8, 0xff, 0xff, 0xa2, 0x00, 0x00, 0x7e, 0xff, 0x62, 0x00, 0x00, 0xc2, 0xff, 0x22, 0x00, 0x02, 0xff, 0xff, 0xff, 0xc6, //...@@@%..+@+..@@...@@@@
-            0x00, 0x14, 0xd4, 0xff, 0xff, 0xff, 0xa2, 0x00, 0x00, 0x7e, 0xff, 0x62, 0x00, 0x00, 0xc2, 0xff, 0x22, 0x00, 0x02, 0xff, 0xff, 0xff, 0xc6, //..@@@@%..+@+..@@...@@@@
-            0x12, 0xd2, 0xff, 0xff, 0xff, 0xff, 0xa2, 0x00, 0x00, 0x7e, 0xff, 0x62, 0x00, 0x00, 0xc2, 0xff, 0x22, 0x00, 0x02, 0xff, 0xff, 0xff, 0xc6, //.@@@@@%..+@+..@@...@@@@
-            0xce, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa2, 0x00, 0x00, 0x7e, 0xff, 0x62, 0x00, 0x00, 0xc2, 0xff, 0x22, 0x00, 0x02, 0xff, 0xff, 0xff, 0xc6, //@@@@@@%..+@+..@@...@@@@
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa2, 0x00, 0x00, 0x7e, 0xff, 0x62, 0x00, 0x00, 0xc2, 0xff, 0x22, 0x00, 0x02, 0xff, 0xff, 0xff, 0xc6, //@@@@@@%..+@+..@@...@@@@
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xcc, 0x6e, 0x6e, 0xb6, 0xff, 0xa8, 0x6e, 0x6e, 0xde, 0xff, 0x84, 0x6e, 0x72, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@++%@%++@@%++@@@@
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
-            0xe8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xae, //@@@@@@@@@@@@@@@@@@@@@@%
-            0xa8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6c, //%@@@@@@@@@@@@@@@@@@@@@+
-            0x1a, 0xe6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc2, 0x02, //.@@@@@@@@@@@@@@@@@@@@@.
-            0x00, 0x16, 0x98, 0xd0, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xc6, 0x7e, 0x06, 0x00, //..%@@@@@@@@@@@@@@@@@+..
-        };
-        constexpr const AlphaSymbol symbol(width, height, data);
-    }
-
-    namespace shuffle {
-        constexpr const size_t height = 30;
-        constexpr const size_t width = 29;
-        constexpr const unsigned char data[height * width] = {
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x52, 0x6c, 0x06, 0x00, 0x00, 0x00, 0x00, //......................++.....
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0xff, 0xff, 0xb6, 0x06, 0x00, 0x00, 0x00, //......................@@%....
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0xff, 0xff, 0xff, 0xb6, 0x06, 0x00, 0x00, //......................@@@%...
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0xff, 0xff, 0xff, 0xff, 0xb6, 0x06, 0x00, //......................@@@@%..
-            0xd2, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0x9e, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4e, 0xe6, 0xee, 0xee, 0xf2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb6, 0x06, //@@@@@@@%.........+@@@@@@@@@%.
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x9c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x42, 0xf8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa0, //@@@@@@@@%.......+@@@@@@@@@@@%
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x8a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0xf2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe2, //@@@@@@@@@%......@@@@@@@@@@@@@
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x00, 0x00, 0x00, 0x2a, 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x62, //@@@@@@@@@@+....@@@@@@@@@@@@@+
-            0x7a, 0x96, 0x96, 0x96, 0x96, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0x34, 0x00, 0x20, 0xe4, 0xff, 0xff, 0xff, 0xff, 0xe4, 0x96, 0xb0, 0xff, 0xff, 0xff, 0xff, 0xff, 0x68, 0x00, //+%%%%%@@@@@...@@@@@@%%@@@@@+.
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xb4, 0xff, 0xff, 0xff, 0x8c, 0x00, 0x18, 0xdc, 0xff, 0xff, 0xff, 0xff, 0xec, 0x2a, 0x00, 0x3e, 0xff, 0xff, 0xff, 0xff, 0x68, 0x00, 0x00, //......%@@@%..@@@@@@...@@@@+..
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0xc2, 0xff, 0xa2, 0x00, 0x10, 0xd2, 0xff, 0xff, 0xff, 0xff, 0xf2, 0x34, 0x00, 0x00, 0x3e, 0xff, 0xff, 0xff, 0x68, 0x00, 0x00, 0x00, //.......@@%..@@@@@@....@@@+...
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x8c, 0x04, 0x0a, 0xc6, 0xff, 0xff, 0xff, 0xff, 0xf8, 0x42, 0x00, 0x00, 0x00, 0x14, 0xec, 0xfc, 0x68, 0x00, 0x00, 0x00, 0x00, //........%..@@@@@@+....@@+....
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0xba, 0xff, 0xff, 0xff, 0xff, 0xfa, 0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, //..........%@@@@@+............
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xac, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........%@@@@@+.............
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9e, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, 0x00, 0x2e, 0x1e, 0x00, 0x00, 0x00, 0x08, 0xba, 0xd4, 0x36, 0x00, 0x00, 0x00, 0x00, //........%@@@@@+.......%@.....
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x22, 0xe8, 0xd8, 0x14, 0x00, 0x00, 0x3a, 0xff, 0xff, 0xf0, 0x36, 0x00, 0x00, 0x00, //.......%@@@@@+..@@....@@@....
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xff, 0xff, 0xff, 0xff, 0xff, 0x90, 0x00, 0x16, 0xdc, 0xff, 0xff, 0xce, 0x0e, 0x00, 0x3e, 0xff, 0xff, 0xff, 0xf0, 0x36, 0x00, 0x00, //......+@@@@@%..@@@@...@@@@...
-            0x3e, 0x56, 0x56, 0x56, 0x56, 0x82, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa0, 0x00, 0x00, 0xc8, 0xff, 0xff, 0xff, 0xff, 0xc2, 0x56, 0x80, 0xff, 0xff, 0xff, 0xff, 0xf0, 0x36, 0x00, //.++++%@@@@@%..@@@@@@+%@@@@@..
-            0xfa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xae, 0x02, 0x00, 0x00, 0x56, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0x36, //@@@@@@@@@@%...+@@@@@@@@@@@@@.
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xbc, 0x06, 0x00, 0x00, 0x00, 0x00, 0x64, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xd2, //@@@@@@@@@%.....+@@@@@@@@@@@@@
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc8, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x74, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc2, //@@@@@@@@@.......+@@@@@@@@@@@@
-            0xf4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xd4, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x84, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xde, 0x1e, //@@@@@@@@.........%@@@@@@@@@@.
-            0x1c, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0x2e, 0x2e, 0x62, 0xff, 0xff, 0xff, 0xff, 0xde, 0x1e, 0x00, //.....................+@@@@@..
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0xff, 0xff, 0xff, 0xde, 0x1e, 0x00, 0x00, //......................@@@@...
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0xff, 0xff, 0xde, 0x1e, 0x00, 0x00, 0x00, //......................@@@....
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x94, 0xb2, 0x1e, 0x00, 0x00, 0x00, 0x00, //......................%%.....
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
-        };
-        constexpr const AlphaSymbol symbol(width, height, data);
-    }
-
-    namespace volume {
-        namespace mute {
-            constexpr const size_t height = 30;
-            constexpr const size_t width = 15;
-            constexpr const unsigned char data[height * width] = {
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //...............
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //...............
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //...............
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x84, 0x9c, 0x12, //............%%.
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xaa, 0xff, 0xff, 0x72, //...........%@@+
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xaa, 0xff, 0xff, 0xff, 0x7e, //..........%@@@+
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xaa, 0xff, 0xff, 0xff, 0xff, 0x7e, //.........%@@@@+
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xa8, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //........%@@@@@+
-                0x08, 0x48, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //.++++++%@@@@@@+
-                0xc6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //@@@@@@@@@@@@@@+
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //@@@@@@@@@@@@@@+
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //@@@@@@@@@@@@@@+
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //@@@@@@@@@@@@@@+
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //@@@@@@@@@@@@@@+
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //@@@@@@@@@@@@@@+
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //@@@@@@@@@@@@@@+
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //@@@@@@@@@@@@@@+
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //@@@@@@@@@@@@@@+
-                0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //@@@@@@@@@@@@@@+
-                0x3c, 0x8c, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0xd6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //.%%%%%%@@@@@@@+
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0xd6, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //........@@@@@@+
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0xd6, 0xff, 0xff, 0xff, 0xff, 0x7e, //.........@@@@@+
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0xd6, 0xff, 0xff, 0xff, 0x7e, //..........@@@@+
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xd6, 0xff, 0xff, 0x7a, //...........@@@+
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xc8, 0xe0, 0x2c, //............@@.
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //...............
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //...............
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //...............
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //...............
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //...............
-            };
-            constexpr const AlphaSymbol symbol(width, height, data);
-        }
-
-        namespace low {
-            constexpr const size_t height = 30;
-            constexpr const size_t width = 22;
-            constexpr const unsigned char data[height * width] = {
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0x8e, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //............+%........
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9c, 0xff, 0xff, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //...........%@@+.......
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9c, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //..........%@@@+.......
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9c, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........%@@@@+.......
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //........%@@@@@+.......
-                0x02, 0x38, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.......%@@@@@@+.......
-                0xba, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x0c, 0x3e, 0x0c, 0x00, 0x00, //%@@@@@@@@@@@@@+.......
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0xae, 0xff, 0xda, 0x26, 0x00, //@@@@@@@@@@@@@@+..%@@..
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0xae, 0xff, 0xff, 0xd4, 0x06, //@@@@@@@@@@@@@@+..%@@@.
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x0e, 0xc6, 0xff, 0xff, 0x5e, //@@@@@@@@@@@@@@+...@@@+
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x26, 0xff, 0xff, 0xa4, //@@@@@@@@@@@@@@+....@@%
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x10, 0xff, 0xff, 0xb2, //@@@@@@@@@@@@@@+....@@%
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x7c, 0xff, 0xff, 0x82, //@@@@@@@@@@@@@@+...+@@%
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x6c, 0xff, 0xff, 0xf8, 0x20, //@@@@@@@@@@@@@@+..+@@@.
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0xc8, 0xff, 0xff, 0x66, 0x00, //@@@@@@@@@@@@@@+..@@@+.
-                0xf2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x46, 0xac, 0x48, 0x00, 0x00, //@@@@@@@@@@@@@@+..+%+..
-                0x46, 0x9c, 0x9e, 0x9e, 0x9e, 0x9e, 0x9e, 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //+%%%%%%@@@@@@@+.......
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0xdc, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //........@@@@@@+.......
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0xdc, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........@@@@@+.......
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0xdc, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //..........@@@@+.......
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0xdc, 0xff, 0xff, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //...........@@@+.......
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0xd2, 0xea, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //............@@........
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
-            };
-            constexpr const AlphaSymbol symbol(width, height, data);
-        }
-
-        namespace high {
-            constexpr const size_t height = 30;
-            constexpr const size_t width = 31;
-            constexpr const unsigned char data[height * width] = {
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x60, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //........................+........
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa6, 0xff, 0xf6, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.......................%@@+......
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8a, 0xff, 0xff, 0xff, 0x8a, 0x00, 0x00, 0x00, 0x00, 0x00, //.......................%@@@%.....
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x84, 0x9c, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8a, 0xff, 0xff, 0xff, 0x88, 0x00, 0x00, 0x00, 0x00, //............%%..........%@@@%....
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xaa, 0xff, 0xff, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6a, 0xfc, 0xff, 0xff, 0x5a, 0x00, 0x00, 0x00, //...........%@@+..........+@@@+...
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xaa, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0xde, 0x8a, 0x02, 0x00, 0x00, 0x68, 0xff, 0xff, 0xf0, 0x1a, 0x00, 0x00, //..........%@@@+.....+@%...+@@@...
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xaa, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xba, 0x10, 0x00, 0x00, 0xb0, 0xff, 0xff, 0xa6, 0x00, 0x00, //.........%@@@@+.....%@@%...%@@%..
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xa8, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x32, 0xf2, 0xff, 0xff, 0xbc, 0x02, 0x00, 0x14, 0xe6, 0xff, 0xfa, 0x16, 0x00, //........%@@@@@+......@@@%...@@@..
-                0x08, 0x48, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0xea, 0xff, 0xff, 0x78, 0x00, 0x00, 0x78, 0xff, 0xff, 0x76, 0x00, //.++++++%@@@@@@+.......@@@+..+@@+.
-                0xc6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x12, 0x5a, 0x12, 0x00, 0x00, 0x2a, 0xf4, 0xff, 0xf8, 0x14, 0x00, 0x18, 0xfa, 0xff, 0xd8, 0x00, //@@@@@@@@@@@@@@+...+....@@@...@@@.
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0xb4, 0xff, 0xe2, 0x2c, 0x00, 0x00, 0x78, 0xff, 0xff, 0x72, 0x00, 0x00, 0xac, 0xff, 0xff, 0x2c, //@@@@@@@@@@@@@@+..%@@...+@@+..%@@.
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0xa6, 0xff, 0xff, 0xda, 0x08, 0x00, 0x10, 0xfa, 0xff, 0xd0, 0x00, 0x00, 0x6e, 0xff, 0xff, 0x5e, //@@@@@@@@@@@@@@+..%@@@...@@@..+@@+
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x0a, 0xbc, 0xff, 0xff, 0x64, 0x00, 0x00, 0xd2, 0xff, 0xf8, 0x00, 0x00, 0x4e, 0xff, 0xff, 0x84, //@@@@@@@@@@@@@@+...%@@+..@@@..+@@%
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x22, 0xff, 0xff, 0xa6, 0x00, 0x00, 0xa4, 0xff, 0xff, 0x1a, 0x00, 0x2c, 0xff, 0xff, 0x94, //@@@@@@@@@@@@@@+....@@%..%@@...@@%
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x12, 0xff, 0xff, 0xb0, 0x00, 0x00, 0x98, 0xff, 0xff, 0x22, 0x00, 0x20, 0xff, 0xff, 0x98, //@@@@@@@@@@@@@@+....@@%..%@@...@@%
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x86, 0xff, 0xff, 0x7e, 0x00, 0x00, 0xc6, 0xff, 0xfc, 0x04, 0x00, 0x38, 0xff, 0xff, 0x88, //@@@@@@@@@@@@@@+...%@@+..@@@...@@%
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x78, 0xff, 0xff, 0xf6, 0x1a, 0x00, 0x04, 0xf2, 0xff, 0xe0, 0x00, 0x00, 0x58, 0xff, 0xff, 0x6c, //@@@@@@@@@@@@@@+..+@@@...@@@..+@@+
-                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0xc6, 0xff, 0xfc, 0x5a, 0x00, 0x00, 0x58, 0xff, 0xff, 0x90, 0x00, 0x00, 0x98, 0xff, 0xff, 0x3a, //@@@@@@@@@@@@@@+..@@@+..+@@%..%@@.
-                0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x3a, 0x9c, 0x3a, 0x00, 0x00, 0x0e, 0xe0, 0xff, 0xff, 0x2a, 0x00, 0x02, 0xde, 0xff, 0xec, 0x06, //@@@@@@@@@@@@@@+...%....@@@...@@@.
-                0x3c, 0x8c, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0xd6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0xc6, 0xff, 0xff, 0xac, 0x00, 0x00, 0x4e, 0xff, 0xff, 0x90, 0x00, //.%%%%%%@@@@@@@+.......@@@%..+@@%.
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0xd6, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0xd4, 0xff, 0xff, 0xe2, 0x12, 0x00, 0x00, 0xc6, 0xff, 0xff, 0x2c, 0x00, //........@@@@@@+......@@@@...@@@..
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0xd6, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa0, 0xff, 0xff, 0xe8, 0x36, 0x00, 0x00, 0x68, 0xff, 0xff, 0xc6, 0x00, 0x00, //.........@@@@@+.....%@@@...+@@@..
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0xd6, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x88, 0xff, 0xc6, 0x1a, 0x00, 0x00, 0x2e, 0xf2, 0xff, 0xfc, 0x38, 0x00, 0x00, //..........@@@@+.....%@@....@@@...
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xd6, 0xff, 0xff, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x02, 0x00, 0x00, 0x20, 0xe6, 0xff, 0xff, 0x8c, 0x00, 0x00, 0x00, //...........@@@+...........@@@%...
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xc8, 0xe0, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0xe8, 0xff, 0xff, 0xbc, 0x04, 0x00, 0x00, 0x00, //............@@..........+@@@%....
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4a, 0xfa, 0xff, 0xff, 0xc2, 0x0c, 0x00, 0x00, 0x00, 0x00, //.......................+@@@@.....
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb6, 0xff, 0xff, 0xa8, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, //.......................%@@%......
-                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0xac, 0x62, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //........................%+.......
-            };
-            constexpr const AlphaSymbol symbol(width, height, data);
-        }
-    }
-
-    namespace prev {
-        constexpr const size_t height = 26;
-        constexpr const size_t width = 16;
-        constexpr const unsigned char data[height * width] = {
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //................
-            0x38, 0x4e, 0x4e, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x1c, 0x00, //.++.............
-            0xec, 0xff, 0xff, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x98, 0xff, 0xf2, 0x22, //@@@%........%@@.
-            0xee, 0xff, 0xff, 0x82, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xa6, 0xff, 0xff, 0xff, 0x5c, //@@@%.......%@@@+
-            0xee, 0xff, 0xff, 0x82, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0xb2, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%......%@@@@+
-            0xee, 0xff, 0xff, 0x82, 0x00, 0x00, 0x00, 0x00, 0x0c, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%.....%@@@@@+
-            0xee, 0xff, 0xff, 0x82, 0x00, 0x00, 0x00, 0x10, 0xc8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%....@@@@@@@+
-            0xee, 0xff, 0xff, 0x82, 0x00, 0x00, 0x18, 0xd2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%...@@@@@@@@+
-            0xee, 0xff, 0xff, 0x82, 0x00, 0x1e, 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%..@@@@@@@@@+
-            0xee, 0xff, 0xff, 0x82, 0x26, 0xe2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%.@@@@@@@@@@+
-            0xee, 0xff, 0xff, 0xb0, 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%@@@@@@@@@@@+
-            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@@@@@@+
-            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@@@@@@+
-            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@@@@@@+
-            0xee, 0xff, 0xff, 0xd6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@@@@@@+
-            0xee, 0xff, 0xff, 0x82, 0x5e, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%+@@@@@@@@@@+
-            0xee, 0xff, 0xff, 0x82, 0x00, 0x50, 0xf8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%.+@@@@@@@@@+
-            0xee, 0xff, 0xff, 0x82, 0x00, 0x00, 0x42, 0xf4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%..+@@@@@@@@+
-            0xee, 0xff, 0xff, 0x82, 0x00, 0x00, 0x00, 0x38, 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%....@@@@@@@+
-            0xee, 0xff, 0xff, 0x82, 0x00, 0x00, 0x00, 0x00, 0x2c, 0xe6, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%.....@@@@@@+
-            0xee, 0xff, 0xff, 0x82, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0xde, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%......@@@@@+
-            0xee, 0xff, 0xff, 0x82, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1a, 0xd6, 0xff, 0xff, 0xff, 0x5e, //@@@%.......@@@@+
-            0xee, 0xff, 0xff, 0x82, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0xcc, 0xff, 0xff, 0x3a, //@@@%........@@@.
-            0x72, 0x8e, 0x8e, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x78, 0x52, 0x00, //+%%..........++.
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //................
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //................
-        };
-        constexpr const AlphaSymbol symbol(width, height, data);
-    }
-
-    namespace next {
-        constexpr const size_t height = 26;
-        constexpr const size_t width = 16;
-        constexpr const unsigned char data[height * width] = {
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //................
-            0x06, 0x40, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x42, 0x4e, 0x4e, 0x0c, //.+..........+++.
-            0xac, 0xff, 0xe2, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0xff, 0xff, 0xff, 0x5c, //%@@.........@@@+
-            0xec, 0xff, 0xff, 0xea, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@........@@@+
-            0xee, 0xff, 0xff, 0xff, 0xf0, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@@.......@@@+
-            0xee, 0xff, 0xff, 0xff, 0xff, 0xf6, 0x4a, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@@@+.....@@@+
-            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfa, 0x58, 0x00, 0x00, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@+....@@@+
-            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc, 0x66, 0x00, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@+...@@@+
-            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x76, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@+..@@@+
-            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x86, 0x10, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@%.@@@+
-            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa4, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@@%@@@+
-            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@@@@@@+
-            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@@@@@@+
-            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@@@@@@+
-            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xca, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@@@@@@+
-            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb4, 0x18, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@%.@@@+
-            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa8, 0x04, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@%..@@@+
-            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x9a, 0x02, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@%...@@@+
-            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x8c, 0x00, 0x00, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@%....@@@+
-            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@@@+.....@@@+
-            0xee, 0xff, 0xff, 0xff, 0xff, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@@+......@@@+
-            0xee, 0xff, 0xff, 0xfc, 0x62, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@+.......@@@+
-            0xce, 0xff, 0xfa, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@+........@@@+
-            0x22, 0x84, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x82, 0x8e, 0x8e, 0x22, //.%..........%%%.
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //................
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //................
-        };
-        constexpr const AlphaSymbol symbol(width, height, data);
-    }
-
-    namespace backward {
-        constexpr const size_t height = 26;
-        constexpr const size_t width = 25;
-        constexpr const unsigned char data[height * width] = {
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0xac, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0x9c, 0x90, 0x08, //..........%+..........%%.
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0xf6, 0xff, 0xff, 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0xe0, 0xff, 0xff, 0x66, //........+@@@.........@@@+
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7a, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0xf0, 0xff, 0xff, 0xff, 0x7e, //.......+@@@@.......+@@@@+
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6a, 0xfc, 0xff, 0xff, 0xff, 0xff, 0x7e, //......%@@@@@......+@@@@@+
-            0x00, 0x00, 0x00, 0x00, 0x12, 0xc0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x00, 0x00, 0x00, 0x02, 0x90, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //.....@@@@@@@.....%@@@@@@+
-            0x00, 0x00, 0x00, 0x26, 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x00, 0x00, 0x0c, 0xb4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //....@@@@@@@@....%@@@@@@@+
-            0x00, 0x00, 0x40, 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x00, 0x1c, 0xd0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //..+@@@@@@@@@...@@@@@@@@@+
-            0x00, 0x60, 0xfa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x34, 0xe6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //.+@@@@@@@@@@..@@@@@@@@@@+
-            0x76, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x80, 0xf4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //+@@@@@@@@@@@%@@@@@@@@@@@+
-            0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //@@@@@@@@@@@@@@@@@@@@@@@@+
-            0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //%@@@@@@@@@@@%@@@@@@@@@@@+
-            0x06, 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x6a, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //.%@@@@@@@@@@.+@@@@@@@@@@+
-            0x00, 0x00, 0x7a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x00, 0x46, 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //..+@@@@@@@@@..+@@@@@@@@@+
-            0x00, 0x00, 0x00, 0x56, 0xf6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x00, 0x00, 0x2c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //...+@@@@@@@@....@@@@@@@@+
-            0x00, 0x00, 0x00, 0x00, 0x36, 0xe8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x00, 0x00, 0x00, 0x16, 0xc8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //.....@@@@@@@.....@@@@@@@+
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0xd2, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x08, 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //......@@@@@@......%@@@@@+
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xb6, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x84, 0xff, 0xff, 0xff, 0xff, 0x7e, //.......%@@@@.......%@@@@+
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x94, 0xff, 0xff, 0xff, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5e, 0xf8, 0xff, 0xff, 0x74, //........%@@@........+@@@+
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6a, 0xee, 0xb0, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0xe0, 0xd2, 0x1e, //.........+@%..........@@.
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
-        };
-        constexpr const AlphaSymbol symbol(width, height, data);
-    }
-
-    namespace forward {
-        constexpr const size_t height = 26;
-        constexpr const size_t width = 25;
-        constexpr const unsigned char data[height * width] = {
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
-            0x4a, 0xb0, 0x58, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0xac, 0x7e, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //+%+..........%+..........
-            0xea, 0xff, 0xff, 0x88, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xff, 0xff, 0xbc, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@%........%@@%.........
-            0xff, 0xff, 0xff, 0xff, 0xae, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xd6, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@%.......%@@@@........
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xcc, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xea, 0x3a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@......%@@@@@.......
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe2, 0x2e, 0x00, 0x00, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf8, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@.....%@@@@@@+.....
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf2, 0x4c, 0x00, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@+...%@@@@@@@+....
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc, 0x6e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa4, 0x06, 0x00, 0x00, //@@@@@@@@@+..%@@@@@@@@%...
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x96, 0x02, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc4, 0x14, 0x00, //@@@@@@@@@@%.%@@@@@@@@@@..
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb2, 0xc2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xdc, 0x18, //@@@@@@@@@@@%@@@@@@@@@@@@.
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x70, //@@@@@@@@@@@@@@@@@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xde, 0xcc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf8, 0x32, //@@@@@@@@@@@@@@@@@@@@@@@@.
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xcc, 0x18, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xea, 0x3a, 0x00, //@@@@@@@@@@@.%@@@@@@@@@@..
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xae, 0x0a, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xd6, 0x22, 0x00, 0x00, //@@@@@@@@@%..%@@@@@@@@@...
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x88, 0x00, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xba, 0x10, 0x00, 0x00, 0x00, //@@@@@@@@%...%@@@@@@@%....
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfa, 0x64, 0x00, 0x00, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x9a, 0x04, 0x00, 0x00, 0x00, 0x00, //@@@@@@@+....%@@@@@@%.....
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xee, 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xfc, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@+.....%@@@@@+......
-            0xff, 0xff, 0xff, 0xff, 0xdc, 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xf4, 0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@.......%@@@@+.......
-            0xf6, 0xff, 0xff, 0xc2, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb6, 0xff, 0xff, 0xe4, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@........%@@@.........
-            0x82, 0xf4, 0x98, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4c, 0xec, 0xc0, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //%@%.........+@@..........
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
-        };
-        constexpr const AlphaSymbol symbol(width, height, data);
-    }
-
-    namespace play {
-        constexpr const size_t height = 26;
-        constexpr const size_t width = 22;
-        constexpr const unsigned char data[height * width] = {
-            0x16, 0xa2, 0xc8, 0x78, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.%@+..................
-            0xbe, 0xff, 0xff, 0xff, 0xdc, 0x4a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //%@@@@+................
-            0xf8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb0, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@%...............
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf6, 0x7a, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@+.............
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xda, 0x46, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@@@+...........
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xac, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@@@@%..........
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf4, 0x78, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@@@@@@+........
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xd8, 0x44, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@@@@@@@@+......
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa8, 0x1a, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@@@@@@@@@%.....
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf2, 0x74, 0x04, 0x00, 0x00, //@@@@@@@@@@@@@@@@@@+...
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xd4, 0x34, 0x00, //@@@@@@@@@@@@@@@@@@@@..
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf2, 0x1e, //@@@@@@@@@@@@@@@@@@@@@.
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@@@@@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x34, //@@@@@@@@@@@@@@@@@@@@@.
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf6, 0x68, 0x00, //@@@@@@@@@@@@@@@@@@@@+.
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb4, 0x22, 0x00, 0x00, //@@@@@@@@@@@@@@@@@@%...
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe0, 0x4e, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@@@@@@@@@@+....
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf8, 0x84, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@@@@@@@@%......
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb8, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@@@@@@%........
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe2, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@@@@@+.........
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfa, 0x88, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@@@%...........
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xbc, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@%.............
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe4, 0x56, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@+..............
-            0xd4, 0xff, 0xff, 0xff, 0xfa, 0x8a, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@%................
-            0x3c, 0xe8, 0xfc, 0xb8, 0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.@@%..................
-            0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
-        };
-        constexpr const AlphaSymbol symbol(width, height, data);
-    }
-
-    namespace pause {
-        constexpr const size_t height = 26;
-        constexpr const size_t width = 22;
-        constexpr const unsigned char data[height * width] = {
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
-            0x00, 0x2a, 0x58, 0x5a, 0x5a, 0x5a, 0x5a, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1a, 0x54, 0x5a, 0x5a, 0x5a, 0x5a, 0x48, 0x06, 0x00, //..+++++.......++++++..
-            0x68, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x94, 0x00, 0x00, 0x00, 0x34, 0xf2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xcc, 0x06, //+@@@@@@@%....@@@@@@@@.
-            0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x16, 0x00, 0x00, 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x56, //@@@@@@@@@...%@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
-            0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1e, 0x00, 0x00, 0xb0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@...%@@@@@@@@+
-            0x8c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb8, 0x00, 0x00, 0x00, 0x4e, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe4, 0x12, //%@@@@@@@%...+@@@@@@@@.
-            0x00, 0x54, 0x82, 0x82, 0x82, 0x82, 0x82, 0x62, 0x06, 0x00, 0x00, 0x00, 0x00, 0x38, 0x7c, 0x82, 0x82, 0x82, 0x82, 0x72, 0x18, 0x00, //.+%%%%%+......+%%%%+..
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
-        };
-        constexpr const AlphaSymbol symbol(width, height, data);
-    }
-
-    namespace stop {
-        constexpr const size_t height = 26;
-        constexpr const size_t width = 22;
-        constexpr const unsigned char data[height * width] = {
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
-            0x00, 0x20, 0x4c, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x3c, 0x02, 0x00, //..+++++++++++++++++...
-            0x60, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc2, 0x04, //+@@@@@@@@@@@@@@@@@@@@.
-            0xe6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x54, //@@@@@@@@@@@@@@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
-            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
-            0xf2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x60, //@@@@@@@@@@@@@@@@@@@@@+
-            0x94, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xea, 0x16, //%@@@@@@@@@@@@@@@@@@@@.
-            0x02, 0x60, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x7e, 0x1e, 0x00, //.+%%%%%%%%%%%%%%%%%+..
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
-            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
-        };
-        constexpr const AlphaSymbol symbol(width, height, data);
-    }
-}
+#pragma once
+
+#include </mnt/c/Users/carel/Downloads/sys-tune-master-128bitsound/sys-tune-master/libtesla/include/tesla.hpp>
+
+class AlphaSymbol {
+  private:
+    s32 m_width;
+    s32 m_height;
+    const u8 *m_data;
+
+  public:
+    constexpr AlphaSymbol(size_t width, size_t height, const u8 *data)
+        : m_width(width), m_height(height), m_data(data) {}
+
+    void draw(s32 x, s32 y, tsl::gfx::Renderer *renderer, tsl::Color color) const {
+        const u8 *ptr = this->m_data;
+        const s32 x_offset = x - (m_width / 2);
+        const s32 y_offset = y - (m_height / 2);
+        for (s32 y1 = 0; y1 < m_height; y1++) {
+            for (s32 x1 = 0; x1 < m_width; x1++) {
+                color.a = static_cast<u16>(ptr[0] >> 4);
+                renderer->setPixelBlendSrc(x_offset + x1, y_offset + y1, renderer->a(color));
+                ptr++;
+            }
+        }
+    }
+};
+
+namespace symbol {
+    namespace repeat {
+        namespace all {
+            constexpr const size_t height = 30;
+            constexpr const size_t width = 29;
+            constexpr const unsigned char data[height * width] = {
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x74, 0xf0, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //..................+@%........
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xda, 0xff, 0xff, 0x92, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //..................@@@%.......
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0xff, 0xff, 0xff, 0x92, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //..................@@@@%......
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0xff, 0xff, 0xff, 0xff, 0x92, 0x00, 0x00, 0x00, 0x00, 0x00, //..................@@@@@%.....
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x84, 0xc4, 0xde, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0x92, 0x00, 0x00, 0x00, 0x00, //......%@@@@@@@@@@@@@@@@@%....
+                0x00, 0x00, 0x00, 0x14, 0xa8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x36, 0x00, 0x00, 0x00, //....%@@@@@@@@@@@@@@@@@@@@....
+                0x00, 0x00, 0x2c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc, 0x20, 0x00, 0x00, 0x00, //...@@@@@@@@@@@@@@@@@@@@@@....
+                0x00, 0x1c, 0xe6, 0xff, 0xff, 0xff, 0xff, 0xf8, 0xce, 0xb0, 0xae, 0xae, 0xae, 0xae, 0xae, 0xae, 0xae, 0xae, 0xf4, 0xff, 0xff, 0xff, 0xff, 0xfc, 0x56, 0x00, 0x00, 0x00, 0x00, //..@@@@@@@%%%%%%%%%@@@@@@+....
+                0x00, 0xbc, 0xff, 0xff, 0xff, 0xf4, 0x6c, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0xff, 0xff, 0xff, 0xfc, 0x56, 0x00, 0x30, 0xbc, 0x70, 0x00, //.%@@@@+...........@@@@@+..%+.
+                0x44, 0xff, 0xff, 0xff, 0xf0, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0xff, 0xff, 0xfc, 0x56, 0x00, 0x10, 0xe6, 0xff, 0xff, 0x34, //+@@@@.............@@@@+..@@@.
+                0x9c, 0xff, 0xff, 0xff, 0x5c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd2, 0xff, 0xfc, 0x56, 0x00, 0x00, 0x2e, 0xff, 0xff, 0xff, 0x90, //%@@@+.............@@@+...@@@%
+                0xda, 0xff, 0xff, 0xea, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4a, 0xb6, 0x4c, 0x00, 0x00, 0x00, 0x02, 0xe6, 0xff, 0xff, 0xcc, //@@@@..............+%+....@@@@
+                0xf2, 0xff, 0xff, 0xba, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb2, 0xff, 0xff, 0xea, //@@@%.....................%@@@
+                0xf2, 0xff, 0xff, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb0, 0xff, 0xff, 0xf6, //@@@%.....................%@@@
+                0xd6, 0xff, 0xff, 0xd4, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x70, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe2, 0xff, 0xff, 0xde, //@@@@.....+...............@@@@
+                0xa8, 0xff, 0xff, 0xff, 0x20, 0x00, 0x00, 0x2a, 0xe8, 0xff, 0xc4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0xff, 0xff, 0xff, 0xaa, //%@@@....@@@..............@@@%
+                0x4c, 0xff, 0xff, 0xfa, 0x1c, 0x00, 0x2a, 0xe8, 0xff, 0xff, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0xd8, 0xff, 0xff, 0xff, 0x5a, //+@@@...@@@@.............@@@@+
+                0x04, 0xc8, 0xf8, 0x5c, 0x00, 0x2a, 0xe8, 0xff, 0xff, 0xff, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xcc, 0xff, 0xff, 0xff, 0xd6, 0x02, //.@@+..@@@@@............@@@@@.
+                0x00, 0x00, 0x06, 0x00, 0x2a, 0xe8, 0xff, 0xff, 0xff, 0xff, 0xec, 0x6e, 0x6e, 0x6e, 0x6e, 0x6e, 0x6e, 0x6e, 0x6e, 0x70, 0x8c, 0xbc, 0xfa, 0xff, 0xff, 0xff, 0xf8, 0x3a, 0x00, //.....@@@@@@+++++++++%%@@@@@..
+                0x00, 0x00, 0x00, 0x10, 0xe8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfa, 0x56, 0x00, 0x00, //....@@@@@@@@@@@@@@@@@@@@@@+..
+                0x00, 0x00, 0x00, 0x50, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe2, 0x3a, 0x00, 0x00, 0x00, //...+@@@@@@@@@@@@@@@@@@@@@....
+                0x00, 0x00, 0x00, 0x08, 0xc6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc, 0xc6, 0x6e, 0x08, 0x00, 0x00, 0x00, 0x00, //....@@@@@@@@@@@@@@@@@@@+.....
+                0x00, 0x00, 0x00, 0x00, 0x0c, 0xc6, 0xff, 0xff, 0xff, 0xff, 0xe4, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x20, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.....@@@@@@..................
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0xc6, 0xff, 0xff, 0xff, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......@@@@@..................
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0xc6, 0xff, 0xff, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.......@@@@..................
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0xc4, 0xff, 0xa6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //........@@%..................
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x36, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
+            };
+            constexpr const AlphaSymbol symbol(width, height, data);
+        }
+
+        namespace one {
+            constexpr const size_t height = 30;
+            constexpr const size_t width = 29;
+            constexpr const unsigned char data[height * width] = {
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x74, 0xf0, 0x86, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //..................+@%........
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xda, 0xff, 0xff, 0x92, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //..................@@@%.......
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0xff, 0xff, 0xff, 0x92, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //..................@@@@%......
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0xff, 0xff, 0xff, 0xff, 0x92, 0x00, 0x00, 0x00, 0x00, 0x00, //..................@@@@@%.....
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x84, 0xc4, 0xde, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0x92, 0x00, 0x00, 0x00, 0x00, //......%@@@@@@@@@@@@@@@@@%....
+                0x00, 0x00, 0x00, 0x14, 0xa8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x36, 0x00, 0x00, 0x00, //....%@@@@@@@@@@@@@@@@@@@@....
+                0x00, 0x00, 0x2c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc, 0x20, 0x00, 0x00, 0x00, //...@@@@@@@@@@@@@@@@@@@@@@....
+                0x00, 0x1c, 0xe6, 0xff, 0xff, 0xff, 0xff, 0xf8, 0xce, 0xb0, 0xae, 0xae, 0xae, 0xae, 0xae, 0xae, 0xae, 0xae, 0xf4, 0xff, 0xff, 0xff, 0xff, 0xfc, 0x56, 0x00, 0x00, 0x00, 0x00, //..@@@@@@@%%%%%%%%%@@@@@@+....
+                0x00, 0xbc, 0xff, 0xff, 0xff, 0xf4, 0x6c, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xde, 0xff, 0xff, 0xff, 0xfc, 0x56, 0x00, 0x30, 0xbc, 0x70, 0x00, //.%@@@@+...........@@@@@+..%+.
+                0x44, 0xff, 0xff, 0xff, 0xf0, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3a, 0x7c, 0x22, 0x00, 0xde, 0xff, 0xff, 0xfc, 0x56, 0x00, 0x10, 0xe6, 0xff, 0xff, 0x34, //+@@@@..........+..@@@@+..@@@.
+                0x9c, 0xff, 0xff, 0xff, 0x5c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6c, 0xff, 0xff, 0x7a, 0x00, 0xd2, 0xff, 0xfc, 0x56, 0x00, 0x00, 0x2e, 0xff, 0xff, 0xff, 0x90, //%@@@+........+@@+.@@@+...@@@%
+                0xda, 0xff, 0xff, 0xea, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0xfc, 0xff, 0xff, 0x7a, 0x00, 0x4a, 0xb6, 0x4c, 0x00, 0x00, 0x00, 0x02, 0xe6, 0xff, 0xff, 0xcc, //@@@@.........@@@+.+%+....@@@@
+                0xf2, 0xff, 0xff, 0xba, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0xb4, 0xff, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb2, 0xff, 0xff, 0xea, //@@@%.........+%@+........%@@@
+                0xf2, 0xff, 0xff, 0xaa, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9a, 0xff, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb0, 0xff, 0xff, 0xf6, //@@@%..........%@+........%@@@
+                0xd6, 0xff, 0xff, 0xd4, 0x00, 0x00, 0x00, 0x00, 0x1e, 0x70, 0x1c, 0x00, 0x00, 0x08, 0xa2, 0xff, 0x84, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe2, 0xff, 0xff, 0xde, //@@@@.....+....%@%........@@@@
+                0xa8, 0xff, 0xff, 0xff, 0x20, 0x00, 0x00, 0x2a, 0xe8, 0xff, 0xc4, 0x00, 0x14, 0xfa, 0xff, 0xff, 0xff, 0xec, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0xff, 0xff, 0xff, 0xaa, //%@@@....@@@..@@@@@.......@@@%
+                0x4c, 0xff, 0xff, 0xfa, 0x1c, 0x00, 0x2a, 0xe8, 0xff, 0xff, 0xde, 0x00, 0x0a, 0xb2, 0xbe, 0xbe, 0xbe, 0xa4, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0xd8, 0xff, 0xff, 0xff, 0x5a, //+@@@...@@@@..%%%%%......@@@@+
+                0x04, 0xc8, 0xf8, 0x5c, 0x00, 0x2a, 0xe8, 0xff, 0xff, 0xff, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xcc, 0xff, 0xff, 0xff, 0xd6, 0x02, //.@@+..@@@@@............@@@@@.
+                0x00, 0x00, 0x06, 0x00, 0x2a, 0xe8, 0xff, 0xff, 0xff, 0xff, 0xec, 0x6e, 0x6e, 0x6e, 0x6e, 0x6e, 0x6e, 0x6e, 0x6e, 0x70, 0x8c, 0xbc, 0xfa, 0xff, 0xff, 0xff, 0xf8, 0x3a, 0x00, //.....@@@@@@+++++++++%%@@@@@..
+                0x00, 0x00, 0x00, 0x10, 0xe8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfa, 0x56, 0x00, 0x00, //....@@@@@@@@@@@@@@@@@@@@@@+..
+                0x00, 0x00, 0x00, 0x50, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe2, 0x3a, 0x00, 0x00, 0x00, //...+@@@@@@@@@@@@@@@@@@@@@....
+                0x00, 0x00, 0x00, 0x08, 0xc6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc, 0xc6, 0x6e, 0x08, 0x00, 0x00, 0x00, 0x00, //....@@@@@@@@@@@@@@@@@@@+.....
+                0x00, 0x00, 0x00, 0x00, 0x0c, 0xc6, 0xff, 0xff, 0xff, 0xff, 0xe4, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x20, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.....@@@@@@..................
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0xc6, 0xff, 0xff, 0xff, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......@@@@@..................
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0xc6, 0xff, 0xff, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.......@@@@..................
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0xc4, 0xff, 0xa6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //........@@%..................
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x36, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
+            };
+            constexpr const AlphaSymbol symbol(width, height, data);
+        }
+    }
+
+    namespace sd {
+        constexpr const size_t height = 30;
+        constexpr const size_t width = 23;
+        constexpr const unsigned char data[height * width] = {
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe6, 0x10, //......@@@@@@@@@@@@@@@@.
+            0x00, 0x00, 0x00, 0x00, 0x1c, 0xdc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x82, //.....@@@@@@@@@@@@@@@@@%
+            0x00, 0x00, 0x00, 0x1a, 0xda, 0xff, 0xe8, 0xbe, 0xbe, 0xde, 0xff, 0xd8, 0xbe, 0xbe, 0xf0, 0xff, 0xc8, 0xbe, 0xc0, 0xff, 0xff, 0xff, 0xb8, //....@@@%%@@@%%@@@%@@@@%
+            0x00, 0x00, 0x18, 0xd8, 0xff, 0xff, 0xa2, 0x00, 0x00, 0x7e, 0xff, 0x62, 0x00, 0x00, 0xc2, 0xff, 0x22, 0x00, 0x02, 0xff, 0xff, 0xff, 0xc6, //...@@@%..+@+..@@...@@@@
+            0x00, 0x14, 0xd4, 0xff, 0xff, 0xff, 0xa2, 0x00, 0x00, 0x7e, 0xff, 0x62, 0x00, 0x00, 0xc2, 0xff, 0x22, 0x00, 0x02, 0xff, 0xff, 0xff, 0xc6, //..@@@@%..+@+..@@...@@@@
+            0x12, 0xd2, 0xff, 0xff, 0xff, 0xff, 0xa2, 0x00, 0x00, 0x7e, 0xff, 0x62, 0x00, 0x00, 0xc2, 0xff, 0x22, 0x00, 0x02, 0xff, 0xff, 0xff, 0xc6, //.@@@@@%..+@+..@@...@@@@
+            0xce, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa2, 0x00, 0x00, 0x7e, 0xff, 0x62, 0x00, 0x00, 0xc2, 0xff, 0x22, 0x00, 0x02, 0xff, 0xff, 0xff, 0xc6, //@@@@@@%..+@+..@@...@@@@
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa2, 0x00, 0x00, 0x7e, 0xff, 0x62, 0x00, 0x00, 0xc2, 0xff, 0x22, 0x00, 0x02, 0xff, 0xff, 0xff, 0xc6, //@@@@@@%..+@+..@@...@@@@
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xcc, 0x6e, 0x6e, 0xb6, 0xff, 0xa8, 0x6e, 0x6e, 0xde, 0xff, 0x84, 0x6e, 0x72, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@++%@%++@@%++@@@@
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc6, //@@@@@@@@@@@@@@@@@@@@@@@
+            0xe8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xae, //@@@@@@@@@@@@@@@@@@@@@@%
+            0xa8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6c, //%@@@@@@@@@@@@@@@@@@@@@+
+            0x1a, 0xe6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc2, 0x02, //.@@@@@@@@@@@@@@@@@@@@@.
+            0x00, 0x16, 0x98, 0xd0, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xde, 0xc6, 0x7e, 0x06, 0x00, //..%@@@@@@@@@@@@@@@@@+..
+        };
+        constexpr const AlphaSymbol symbol(width, height, data);
+    }
+
+    namespace shuffle {
+        constexpr const size_t height = 30;
+        constexpr const size_t width = 29;
+        constexpr const unsigned char data[height * width] = {
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x52, 0x6c, 0x06, 0x00, 0x00, 0x00, 0x00, //......................++.....
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0xff, 0xff, 0xb6, 0x06, 0x00, 0x00, 0x00, //......................@@%....
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0xff, 0xff, 0xff, 0xb6, 0x06, 0x00, 0x00, //......................@@@%...
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0xff, 0xff, 0xff, 0xff, 0xb6, 0x06, 0x00, //......................@@@@%..
+            0xd2, 0xee, 0xee, 0xee, 0xee, 0xee, 0xee, 0x9e, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4e, 0xe6, 0xee, 0xee, 0xf2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb6, 0x06, //@@@@@@@%.........+@@@@@@@@@%.
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x9c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x42, 0xf8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa0, //@@@@@@@@%.......+@@@@@@@@@@@%
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x8a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0xf2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe2, //@@@@@@@@@%......@@@@@@@@@@@@@
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x00, 0x00, 0x00, 0x2a, 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x62, //@@@@@@@@@@+....@@@@@@@@@@@@@+
+            0x7a, 0x96, 0x96, 0x96, 0x96, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0x34, 0x00, 0x20, 0xe4, 0xff, 0xff, 0xff, 0xff, 0xe4, 0x96, 0xb0, 0xff, 0xff, 0xff, 0xff, 0xff, 0x68, 0x00, //+%%%%%@@@@@...@@@@@@%%@@@@@+.
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xb4, 0xff, 0xff, 0xff, 0x8c, 0x00, 0x18, 0xdc, 0xff, 0xff, 0xff, 0xff, 0xec, 0x2a, 0x00, 0x3e, 0xff, 0xff, 0xff, 0xff, 0x68, 0x00, 0x00, //......%@@@%..@@@@@@...@@@@+..
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0xc2, 0xff, 0xa2, 0x00, 0x10, 0xd2, 0xff, 0xff, 0xff, 0xff, 0xf2, 0x34, 0x00, 0x00, 0x3e, 0xff, 0xff, 0xff, 0x68, 0x00, 0x00, 0x00, //.......@@%..@@@@@@....@@@+...
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x8c, 0x04, 0x0a, 0xc6, 0xff, 0xff, 0xff, 0xff, 0xf8, 0x42, 0x00, 0x00, 0x00, 0x14, 0xec, 0xfc, 0x68, 0x00, 0x00, 0x00, 0x00, //........%..@@@@@@+....@@+....
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0xba, 0xff, 0xff, 0xff, 0xff, 0xfa, 0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, //..........%@@@@@+............
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xac, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........%@@@@@+.............
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9e, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, 0x00, 0x2e, 0x1e, 0x00, 0x00, 0x00, 0x08, 0xba, 0xd4, 0x36, 0x00, 0x00, 0x00, 0x00, //........%@@@@@+.......%@.....
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8c, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x22, 0xe8, 0xd8, 0x14, 0x00, 0x00, 0x3a, 0xff, 0xff, 0xf0, 0x36, 0x00, 0x00, 0x00, //.......%@@@@@+..@@....@@@....
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0xff, 0xff, 0xff, 0xff, 0xff, 0x90, 0x00, 0x16, 0xdc, 0xff, 0xff, 0xce, 0x0e, 0x00, 0x3e, 0xff, 0xff, 0xff, 0xf0, 0x36, 0x00, 0x00, //......+@@@@@%..@@@@...@@@@...
+            0x3e, 0x56, 0x56, 0x56, 0x56, 0x82, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa0, 0x00, 0x00, 0xc8, 0xff, 0xff, 0xff, 0xff, 0xc2, 0x56, 0x80, 0xff, 0xff, 0xff, 0xff, 0xf0, 0x36, 0x00, //.++++%@@@@@%..@@@@@@+%@@@@@..
+            0xfa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xae, 0x02, 0x00, 0x00, 0x56, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0x36, //@@@@@@@@@@%...+@@@@@@@@@@@@@.
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xbc, 0x06, 0x00, 0x00, 0x00, 0x00, 0x64, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xd2, //@@@@@@@@@%.....+@@@@@@@@@@@@@
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc8, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x74, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc2, //@@@@@@@@@.......+@@@@@@@@@@@@
+            0xf4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xd4, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x84, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xde, 0x1e, //@@@@@@@@.........%@@@@@@@@@@.
+            0x1c, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x2e, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0x2e, 0x2e, 0x62, 0xff, 0xff, 0xff, 0xff, 0xde, 0x1e, 0x00, //.....................+@@@@@..
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0xff, 0xff, 0xff, 0xde, 0x1e, 0x00, 0x00, //......................@@@@...
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0xff, 0xff, 0xde, 0x1e, 0x00, 0x00, 0x00, //......................@@@....
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x94, 0xb2, 0x1e, 0x00, 0x00, 0x00, 0x00, //......................%%.....
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.............................
+        };
+        constexpr const AlphaSymbol symbol(width, height, data);
+    }
+
+    namespace volume {
+        namespace mute {
+            constexpr const size_t height = 30;
+            constexpr const size_t width = 15;
+            constexpr const unsigned char data[height * width] = {
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //...............
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //...............
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //...............
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x84, 0x9c, 0x12, //............%%.
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xaa, 0xff, 0xff, 0x72, //...........%@@+
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xaa, 0xff, 0xff, 0xff, 0x7e, //..........%@@@+
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xaa, 0xff, 0xff, 0xff, 0xff, 0x7e, //.........%@@@@+
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xa8, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //........%@@@@@+
+                0x08, 0x48, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //.++++++%@@@@@@+
+                0xc6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //@@@@@@@@@@@@@@+
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //@@@@@@@@@@@@@@+
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //@@@@@@@@@@@@@@+
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //@@@@@@@@@@@@@@+
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //@@@@@@@@@@@@@@+
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //@@@@@@@@@@@@@@+
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //@@@@@@@@@@@@@@+
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //@@@@@@@@@@@@@@+
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //@@@@@@@@@@@@@@+
+                0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //@@@@@@@@@@@@@@+
+                0x3c, 0x8c, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0xd6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //.%%%%%%@@@@@@@+
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0xd6, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //........@@@@@@+
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0xd6, 0xff, 0xff, 0xff, 0xff, 0x7e, //.........@@@@@+
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0xd6, 0xff, 0xff, 0xff, 0x7e, //..........@@@@+
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xd6, 0xff, 0xff, 0x7a, //...........@@@+
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xc8, 0xe0, 0x2c, //............@@.
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //...............
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //...............
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //...............
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //...............
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //...............
+            };
+            constexpr const AlphaSymbol symbol(width, height, data);
+        }
+
+        namespace low {
+            constexpr const size_t height = 30;
+            constexpr const size_t width = 22;
+            constexpr const unsigned char data[height * width] = {
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x76, 0x8e, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //............+%........
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9c, 0xff, 0xff, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //...........%@@+.......
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9c, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //..........%@@@+.......
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9c, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........%@@@@+.......
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9a, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //........%@@@@@+.......
+                0x02, 0x38, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.......%@@@@@@+.......
+                0xba, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x0c, 0x3e, 0x0c, 0x00, 0x00, //%@@@@@@@@@@@@@+.......
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0xae, 0xff, 0xda, 0x26, 0x00, //@@@@@@@@@@@@@@+..%@@..
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0xae, 0xff, 0xff, 0xd4, 0x06, //@@@@@@@@@@@@@@+..%@@@.
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x0e, 0xc6, 0xff, 0xff, 0x5e, //@@@@@@@@@@@@@@+...@@@+
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x26, 0xff, 0xff, 0xa4, //@@@@@@@@@@@@@@+....@@%
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x10, 0xff, 0xff, 0xb2, //@@@@@@@@@@@@@@+....@@%
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x7c, 0xff, 0xff, 0x82, //@@@@@@@@@@@@@@+...+@@%
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x6c, 0xff, 0xff, 0xf8, 0x20, //@@@@@@@@@@@@@@+..+@@@.
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0xc8, 0xff, 0xff, 0x66, 0x00, //@@@@@@@@@@@@@@+..@@@+.
+                0xf2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x46, 0xac, 0x48, 0x00, 0x00, //@@@@@@@@@@@@@@+..+%+..
+                0x46, 0x9c, 0x9e, 0x9e, 0x9e, 0x9e, 0x9e, 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //+%%%%%%@@@@@@@+.......
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0xdc, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //........@@@@@@+.......
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0xdc, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........@@@@@+.......
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0xdc, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //..........@@@@+.......
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0xdc, 0xff, 0xff, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //...........@@@+.......
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0xd2, 0xea, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //............@@........
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
+            };
+            constexpr const AlphaSymbol symbol(width, height, data);
+        }
+
+        namespace high {
+            constexpr const size_t height = 30;
+            constexpr const size_t width = 31;
+            constexpr const unsigned char data[height * width] = {
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x60, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //........................+........
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa6, 0xff, 0xf6, 0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.......................%@@+......
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8a, 0xff, 0xff, 0xff, 0x8a, 0x00, 0x00, 0x00, 0x00, 0x00, //.......................%@@@%.....
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x84, 0x9c, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8a, 0xff, 0xff, 0xff, 0x88, 0x00, 0x00, 0x00, 0x00, //............%%..........%@@@%....
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xaa, 0xff, 0xff, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6a, 0xfc, 0xff, 0xff, 0x5a, 0x00, 0x00, 0x00, //...........%@@+..........+@@@+...
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xaa, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0xde, 0x8a, 0x02, 0x00, 0x00, 0x68, 0xff, 0xff, 0xf0, 0x1a, 0x00, 0x00, //..........%@@@+.....+@%...+@@@...
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xaa, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb4, 0xff, 0xff, 0xba, 0x10, 0x00, 0x00, 0xb0, 0xff, 0xff, 0xa6, 0x00, 0x00, //.........%@@@@+.....%@@%...%@@%..
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xa8, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x32, 0xf2, 0xff, 0xff, 0xbc, 0x02, 0x00, 0x14, 0xe6, 0xff, 0xfa, 0x16, 0x00, //........%@@@@@+......@@@%...@@@..
+                0x08, 0x48, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0xea, 0xff, 0xff, 0x78, 0x00, 0x00, 0x78, 0xff, 0xff, 0x76, 0x00, //.++++++%@@@@@@+.......@@@+..+@@+.
+                0xc6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x12, 0x5a, 0x12, 0x00, 0x00, 0x2a, 0xf4, 0xff, 0xf8, 0x14, 0x00, 0x18, 0xfa, 0xff, 0xd8, 0x00, //@@@@@@@@@@@@@@+...+....@@@...@@@.
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0xb4, 0xff, 0xe2, 0x2c, 0x00, 0x00, 0x78, 0xff, 0xff, 0x72, 0x00, 0x00, 0xac, 0xff, 0xff, 0x2c, //@@@@@@@@@@@@@@+..%@@...+@@+..%@@.
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0xa6, 0xff, 0xff, 0xda, 0x08, 0x00, 0x10, 0xfa, 0xff, 0xd0, 0x00, 0x00, 0x6e, 0xff, 0xff, 0x5e, //@@@@@@@@@@@@@@+..%@@@...@@@..+@@+
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x0a, 0xbc, 0xff, 0xff, 0x64, 0x00, 0x00, 0xd2, 0xff, 0xf8, 0x00, 0x00, 0x4e, 0xff, 0xff, 0x84, //@@@@@@@@@@@@@@+...%@@+..@@@..+@@%
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x22, 0xff, 0xff, 0xa6, 0x00, 0x00, 0xa4, 0xff, 0xff, 0x1a, 0x00, 0x2c, 0xff, 0xff, 0x94, //@@@@@@@@@@@@@@+....@@%..%@@...@@%
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x12, 0xff, 0xff, 0xb0, 0x00, 0x00, 0x98, 0xff, 0xff, 0x22, 0x00, 0x20, 0xff, 0xff, 0x98, //@@@@@@@@@@@@@@+....@@%..%@@...@@%
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x86, 0xff, 0xff, 0x7e, 0x00, 0x00, 0xc6, 0xff, 0xfc, 0x04, 0x00, 0x38, 0xff, 0xff, 0x88, //@@@@@@@@@@@@@@+...%@@+..@@@...@@%
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x78, 0xff, 0xff, 0xf6, 0x1a, 0x00, 0x04, 0xf2, 0xff, 0xe0, 0x00, 0x00, 0x58, 0xff, 0xff, 0x6c, //@@@@@@@@@@@@@@+..+@@@...@@@..+@@+
+                0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0xc6, 0xff, 0xfc, 0x5a, 0x00, 0x00, 0x58, 0xff, 0xff, 0x90, 0x00, 0x00, 0x98, 0xff, 0xff, 0x3a, //@@@@@@@@@@@@@@+..@@@+..+@@%..%@@.
+                0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x3a, 0x9c, 0x3a, 0x00, 0x00, 0x0e, 0xe0, 0xff, 0xff, 0x2a, 0x00, 0x02, 0xde, 0xff, 0xec, 0x06, //@@@@@@@@@@@@@@+...%....@@@...@@@.
+                0x3c, 0x8c, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0xd6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0xc6, 0xff, 0xff, 0xac, 0x00, 0x00, 0x4e, 0xff, 0xff, 0x90, 0x00, //.%%%%%%@@@@@@@+.......@@@%..+@@%.
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0xd6, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0xd4, 0xff, 0xff, 0xe2, 0x12, 0x00, 0x00, 0xc6, 0xff, 0xff, 0x2c, 0x00, //........@@@@@@+......@@@@...@@@..
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0xd6, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa0, 0xff, 0xff, 0xe8, 0x36, 0x00, 0x00, 0x68, 0xff, 0xff, 0xc6, 0x00, 0x00, //.........@@@@@+.....%@@@...+@@@..
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0xd6, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x88, 0xff, 0xc6, 0x1a, 0x00, 0x00, 0x2e, 0xf2, 0xff, 0xfc, 0x38, 0x00, 0x00, //..........@@@@+.....%@@....@@@...
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xd6, 0xff, 0xff, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x02, 0x00, 0x00, 0x20, 0xe6, 0xff, 0xff, 0x8c, 0x00, 0x00, 0x00, //...........@@@+...........@@@%...
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xc8, 0xe0, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0xe8, 0xff, 0xff, 0xbc, 0x04, 0x00, 0x00, 0x00, //............@@..........+@@@%....
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4a, 0xfa, 0xff, 0xff, 0xc2, 0x0c, 0x00, 0x00, 0x00, 0x00, //.......................+@@@@.....
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb6, 0xff, 0xff, 0xa8, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, //.......................%@@%......
+                0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0xac, 0x62, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //........................%+.......
+            };
+            constexpr const AlphaSymbol symbol(width, height, data);
+        }
+    }
+
+    namespace prev {
+        constexpr const size_t height = 26;
+        constexpr const size_t width = 16;
+        constexpr const unsigned char data[height * width] = {
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //................
+            0x38, 0x4e, 0x4e, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x1c, 0x00, //.++.............
+            0xec, 0xff, 0xff, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x98, 0xff, 0xf2, 0x22, //@@@%........%@@.
+            0xee, 0xff, 0xff, 0x82, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0xa6, 0xff, 0xff, 0xff, 0x5c, //@@@%.......%@@@+
+            0xee, 0xff, 0xff, 0x82, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0xb2, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%......%@@@@+
+            0xee, 0xff, 0xff, 0x82, 0x00, 0x00, 0x00, 0x00, 0x0c, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%.....%@@@@@+
+            0xee, 0xff, 0xff, 0x82, 0x00, 0x00, 0x00, 0x10, 0xc8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%....@@@@@@@+
+            0xee, 0xff, 0xff, 0x82, 0x00, 0x00, 0x18, 0xd2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%...@@@@@@@@+
+            0xee, 0xff, 0xff, 0x82, 0x00, 0x1e, 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%..@@@@@@@@@+
+            0xee, 0xff, 0xff, 0x82, 0x26, 0xe2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%.@@@@@@@@@@+
+            0xee, 0xff, 0xff, 0xb0, 0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%@@@@@@@@@@@+
+            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@@@@@@+
+            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@@@@@@+
+            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@@@@@@+
+            0xee, 0xff, 0xff, 0xd6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@@@@@@+
+            0xee, 0xff, 0xff, 0x82, 0x5e, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%+@@@@@@@@@@+
+            0xee, 0xff, 0xff, 0x82, 0x00, 0x50, 0xf8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%.+@@@@@@@@@+
+            0xee, 0xff, 0xff, 0x82, 0x00, 0x00, 0x42, 0xf4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%..+@@@@@@@@+
+            0xee, 0xff, 0xff, 0x82, 0x00, 0x00, 0x00, 0x38, 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%....@@@@@@@+
+            0xee, 0xff, 0xff, 0x82, 0x00, 0x00, 0x00, 0x00, 0x2c, 0xe6, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%.....@@@@@@+
+            0xee, 0xff, 0xff, 0x82, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0xde, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@%......@@@@@+
+            0xee, 0xff, 0xff, 0x82, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1a, 0xd6, 0xff, 0xff, 0xff, 0x5e, //@@@%.......@@@@+
+            0xee, 0xff, 0xff, 0x82, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14, 0xcc, 0xff, 0xff, 0x3a, //@@@%........@@@.
+            0x72, 0x8e, 0x8e, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x78, 0x52, 0x00, //+%%..........++.
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //................
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //................
+        };
+        constexpr const AlphaSymbol symbol(width, height, data);
+    }
+
+    namespace next {
+        constexpr const size_t height = 26;
+        constexpr const size_t width = 16;
+        constexpr const unsigned char data[height * width] = {
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //................
+            0x06, 0x40, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x42, 0x4e, 0x4e, 0x0c, //.+..........+++.
+            0xac, 0xff, 0xe2, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0xff, 0xff, 0xff, 0x5c, //%@@.........@@@+
+            0xec, 0xff, 0xff, 0xea, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@........@@@+
+            0xee, 0xff, 0xff, 0xff, 0xf0, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@@.......@@@+
+            0xee, 0xff, 0xff, 0xff, 0xff, 0xf6, 0x4a, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@@@+.....@@@+
+            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfa, 0x58, 0x00, 0x00, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@+....@@@+
+            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc, 0x66, 0x00, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@+...@@@+
+            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x76, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@+..@@@+
+            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x86, 0x10, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@%.@@@+
+            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa4, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@@%@@@+
+            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@@@@@@+
+            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@@@@@@+
+            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@@@@@@+
+            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xca, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@@@@@@+
+            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb4, 0x18, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@%.@@@+
+            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa8, 0x04, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@%..@@@+
+            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x9a, 0x02, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@%...@@@+
+            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x8c, 0x00, 0x00, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@%....@@@+
+            0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@@@+.....@@@+
+            0xee, 0xff, 0xff, 0xff, 0xff, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@@+......@@@+
+            0xee, 0xff, 0xff, 0xfc, 0x62, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@@+.......@@@+
+            0xce, 0xff, 0xfa, 0x54, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xff, 0xff, 0xff, 0x5e, //@@@+........@@@+
+            0x22, 0x84, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x82, 0x8e, 0x8e, 0x22, //.%..........%%%.
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //................
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //................
+        };
+        constexpr const AlphaSymbol symbol(width, height, data);
+    }
+
+    namespace backward {
+        constexpr const size_t height = 26;
+        constexpr const size_t width = 25;
+        constexpr const unsigned char data[height * width] = {
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0xac, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x16, 0x9c, 0x90, 0x08, //..........%+..........%%.
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0xf6, 0xff, 0xff, 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0xe0, 0xff, 0xff, 0x66, //........+@@@.........@@@+
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7a, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0xf0, 0xff, 0xff, 0xff, 0x7e, //.......+@@@@.......+@@@@+
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6a, 0xfc, 0xff, 0xff, 0xff, 0xff, 0x7e, //......%@@@@@......+@@@@@+
+            0x00, 0x00, 0x00, 0x00, 0x12, 0xc0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x00, 0x00, 0x00, 0x02, 0x90, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //.....@@@@@@@.....%@@@@@@+
+            0x00, 0x00, 0x00, 0x26, 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x00, 0x00, 0x0c, 0xb4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //....@@@@@@@@....%@@@@@@@+
+            0x00, 0x00, 0x40, 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x00, 0x1c, 0xd0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //..+@@@@@@@@@...@@@@@@@@@+
+            0x00, 0x60, 0xfa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x34, 0xe6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //.+@@@@@@@@@@..@@@@@@@@@@+
+            0x76, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x80, 0xf4, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //+@@@@@@@@@@@%@@@@@@@@@@@+
+            0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //@@@@@@@@@@@@@@@@@@@@@@@@+
+            0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //%@@@@@@@@@@@%@@@@@@@@@@@+
+            0x06, 0xa0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x6a, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //.%@@@@@@@@@@.+@@@@@@@@@@+
+            0x00, 0x00, 0x7a, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x00, 0x46, 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //..+@@@@@@@@@..+@@@@@@@@@+
+            0x00, 0x00, 0x00, 0x56, 0xf6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x00, 0x00, 0x2c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //...+@@@@@@@@....@@@@@@@@+
+            0x00, 0x00, 0x00, 0x00, 0x36, 0xe8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x00, 0x00, 0x00, 0x16, 0xc8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //.....@@@@@@@.....@@@@@@@+
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x1e, 0xd2, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x08, 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, //......@@@@@@......%@@@@@+
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0e, 0xb6, 0xff, 0xff, 0xff, 0xff, 0x3e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x84, 0xff, 0xff, 0xff, 0xff, 0x7e, //.......%@@@@.......%@@@@+
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x94, 0xff, 0xff, 0xff, 0x34, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5e, 0xf8, 0xff, 0xff, 0x74, //........%@@@........+@@@+
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6a, 0xee, 0xb0, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0xe0, 0xd2, 0x1e, //.........+@%..........@@.
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
+        };
+        constexpr const AlphaSymbol symbol(width, height, data);
+    }
+
+    namespace forward {
+        constexpr const size_t height = 26;
+        constexpr const size_t width = 25;
+        constexpr const unsigned char data[height * width] = {
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
+            0x4a, 0xb0, 0x58, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0xac, 0x7e, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //+%+..........%+..........
+            0xea, 0xff, 0xff, 0x88, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, 0xff, 0xff, 0xbc, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@%........%@@%.........
+            0xff, 0xff, 0xff, 0xff, 0xae, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xd6, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@%.......%@@@@........
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xcc, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xea, 0x3a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@......%@@@@@.......
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe2, 0x2e, 0x00, 0x00, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf8, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@.....%@@@@@@+.....
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf2, 0x4c, 0x00, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7e, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@+...%@@@@@@@+....
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfc, 0x6e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa4, 0x06, 0x00, 0x00, //@@@@@@@@@+..%@@@@@@@@%...
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x96, 0x02, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc4, 0x14, 0x00, //@@@@@@@@@@%.%@@@@@@@@@@..
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb2, 0xc2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xdc, 0x18, //@@@@@@@@@@@%@@@@@@@@@@@@.
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x70, //@@@@@@@@@@@@@@@@@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xde, 0xcc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf8, 0x32, //@@@@@@@@@@@@@@@@@@@@@@@@.
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xcc, 0x18, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xea, 0x3a, 0x00, //@@@@@@@@@@@.%@@@@@@@@@@..
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xae, 0x0a, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xd6, 0x22, 0x00, 0x00, //@@@@@@@@@%..%@@@@@@@@@...
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x88, 0x00, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xba, 0x10, 0x00, 0x00, 0x00, //@@@@@@@@%...%@@@@@@@%....
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfa, 0x64, 0x00, 0x00, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x9a, 0x04, 0x00, 0x00, 0x00, 0x00, //@@@@@@@+....%@@@@@@%.....
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xee, 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xfc, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@+.....%@@@@@+......
+            0xff, 0xff, 0xff, 0xff, 0xdc, 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xf4, 0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@.......%@@@@+.......
+            0xf6, 0xff, 0xff, 0xc2, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb6, 0xff, 0xff, 0xe4, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@........%@@@.........
+            0x82, 0xf4, 0x98, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4c, 0xec, 0xc0, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //%@%.........+@@..........
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.........................
+        };
+        constexpr const AlphaSymbol symbol(width, height, data);
+    }
+
+    namespace play {
+        constexpr const size_t height = 26;
+        constexpr const size_t width = 22;
+        constexpr const unsigned char data[height * width] = {
+            0x16, 0xa2, 0xc8, 0x78, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.%@+..................
+            0xbe, 0xff, 0xff, 0xff, 0xdc, 0x4a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //%@@@@+................
+            0xf8, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb0, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@%...............
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf6, 0x7a, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@+.............
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xda, 0x46, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@@@+...........
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xac, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@@@@%..........
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf4, 0x78, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@@@@@@+........
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xd8, 0x44, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@@@@@@@@+......
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xa8, 0x1a, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@@@@@@@@@%.....
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf2, 0x74, 0x04, 0x00, 0x00, //@@@@@@@@@@@@@@@@@@+...
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xd4, 0x34, 0x00, //@@@@@@@@@@@@@@@@@@@@..
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf2, 0x1e, //@@@@@@@@@@@@@@@@@@@@@.
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@@@@@@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x34, //@@@@@@@@@@@@@@@@@@@@@.
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf6, 0x68, 0x00, //@@@@@@@@@@@@@@@@@@@@+.
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb4, 0x22, 0x00, 0x00, //@@@@@@@@@@@@@@@@@@%...
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe0, 0x4e, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@@@@@@@@@@+....
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xf8, 0x84, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@@@@@@@@%......
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb8, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@@@@@@%........
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe2, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@@@@@+.........
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfa, 0x88, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@@@%...........
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xbc, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@@%.............
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe4, 0x56, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@@@+..............
+            0xd4, 0xff, 0xff, 0xff, 0xfa, 0x8a, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //@@@@@%................
+            0x3c, 0xe8, 0xfc, 0xb8, 0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //.@@%..................
+            0x00, 0x02, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
+        };
+        constexpr const AlphaSymbol symbol(width, height, data);
+    }
+
+    namespace pause {
+        constexpr const size_t height = 26;
+        constexpr const size_t width = 22;
+        constexpr const unsigned char data[height * width] = {
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
+            0x00, 0x2a, 0x58, 0x5a, 0x5a, 0x5a, 0x5a, 0x38, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1a, 0x54, 0x5a, 0x5a, 0x5a, 0x5a, 0x48, 0x06, 0x00, //..+++++.......++++++..
+            0x68, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x94, 0x00, 0x00, 0x00, 0x34, 0xf2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xcc, 0x06, //+@@@@@@@%....@@@@@@@@.
+            0xea, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x16, 0x00, 0x00, 0xaa, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x56, //@@@@@@@@@...%@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x2e, 0x00, 0x00, 0xbe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@...%@@@@@@@@+
+            0xf0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1e, 0x00, 0x00, 0xb0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x5e, //@@@@@@@@@...%@@@@@@@@+
+            0x8c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xb8, 0x00, 0x00, 0x00, 0x4e, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe4, 0x12, //%@@@@@@@%...+@@@@@@@@.
+            0x00, 0x54, 0x82, 0x82, 0x82, 0x82, 0x82, 0x62, 0x06, 0x00, 0x00, 0x00, 0x00, 0x38, 0x7c, 0x82, 0x82, 0x82, 0x82, 0x72, 0x18, 0x00, //.+%%%%%+......+%%%%+..
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
+        };
+        constexpr const AlphaSymbol symbol(width, height, data);
+    }
+
+    namespace stop {
+        constexpr const size_t height = 26;
+        constexpr const size_t width = 22;
+        constexpr const unsigned char data[height * width] = {
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
+            0x00, 0x20, 0x4c, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x4e, 0x3c, 0x02, 0x00, //..+++++++++++++++++...
+            0x60, 0xfc, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xc2, 0x04, //+@@@@@@@@@@@@@@@@@@@@.
+            0xe6, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x54, //@@@@@@@@@@@@@@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x6e, //@@@@@@@@@@@@@@@@@@@@@+
+            0xf2, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x60, //@@@@@@@@@@@@@@@@@@@@@+
+            0x94, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xea, 0x16, //%@@@@@@@@@@@@@@@@@@@@.
+            0x02, 0x60, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x8e, 0x7e, 0x1e, 0x00, //.+%%%%%%%%%%%%%%%%%+..
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //......................
+        };
+        constexpr const AlphaSymbol symbol(width, height, data);
+    }
+}
diff --git a/overlay/sys-tune-overlay.elf b/overlay/sys-tune-overlay.elf
new file mode 100644
index 0000000..0830c1e
Binary files /dev/null and b/overlay/sys-tune-overlay.elf differ
diff --git a/overlay/sys-tune-overlay.nacp b/overlay/sys-tune-overlay.nacp
new file mode 100644
index 0000000..596ed1f
Binary files /dev/null and b/overlay/sys-tune-overlay.nacp differ
diff --git a/overlay/sys-tune-overlay.ovl b/overlay/sys-tune-overlay.ovl
new file mode 100644
index 0000000..28cce0f
Binary files /dev/null and b/overlay/sys-tune-overlay.ovl differ
diff --git a/sys-tune-overlay.lst b/sys-tune-overlay.lst
index 66d2aa9..b94325a 100644
--- a/sys-tune-overlay.lst
+++ b/sys-tune-overlay.lst
@@ -1,2 +1,2 @@
--CSn
-/home/hxcpxxp/steven/overlay/sys-tune-overlay.elf
+-CSn
+/home/hxcpxxp/steven/overlay/sys-tune-overlay.elf
diff --git a/sys-tune.lst b/sys-tune.lst
index 1416c00..37b8208 100644
--- a/sys-tune.lst
+++ b/sys-tune.lst
@@ -1,2 +1,2 @@
--CSn
-/home/hxcpxxp/steven/sys-tune/sys-tune.elf
+-CSn
+/home/hxcpxxp/steven/sys-tune/sys-tune.elf
diff --git a/sys-tune/nxExt/lib/libnxExt.a b/sys-tune/nxExt/lib/libnxExt.a
new file mode 100644
index 0000000..f32aec2
Binary files /dev/null and b/sys-tune/nxExt/lib/libnxExt.a differ
diff --git a/sys-tune/nxExt/release/ipc_server.d b/sys-tune/nxExt/release/ipc_server.d
new file mode 100644
index 0000000..0a4b70c
--- /dev/null
+++ b/sys-tune/nxExt/release/ipc_server.d
@@ -0,0 +1,348 @@
+ipc_server.o: \
+ /mnt/c/Users/carel/erp/sys-tune/sys-tune/nxExt/src/ipc_server.c \
+ /mnt/c/Users/carel/erp/sys-tune/sys-tune/nxExt/include/nxExt/ipc_server.h \
+ /opt/devkitpro/libnx/include/switch.h \
+ /opt/devkitpro/libnx/include/switch/types.h \
+ /opt/devkitpro/libnx/include/switch/result.h \
+ /opt/devkitpro/libnx/include/switch/nro.h \
+ /opt/devkitpro/libnx/include/switch/nacp.h \
+ /opt/devkitpro/libnx/include/switch/arm/tls.h \
+ /opt/devkitpro/libnx/include/switch/arm/cache.h \
+ /opt/devkitpro/libnx/include/switch/arm/counter.h \
+ /opt/devkitpro/libnx/include/switch/kernel/svc.h \
+ /opt/devkitpro/libnx/include/switch/kernel/../arm/thread_context.h \
+ /opt/devkitpro/libnx/include/switch/kernel/wait.h \
+ /opt/devkitpro/libnx/include/switch/kernel/mutex.h \
+ /opt/devkitpro/libnx/include/switch/kernel/tmem.h \
+ /opt/devkitpro/libnx/include/switch/kernel/shmem.h \
+ /opt/devkitpro/libnx/include/switch/kernel/event.h \
+ /opt/devkitpro/libnx/include/switch/kernel/levent.h \
+ /opt/devkitpro/libnx/include/switch/kernel/uevent.h \
+ /opt/devkitpro/libnx/include/switch/kernel/utimer.h \
+ /opt/devkitpro/libnx/include/switch/kernel/rwlock.h \
+ /opt/devkitpro/libnx/include/switch/kernel/../kernel/condvar.h \
+ /opt/devkitpro/libnx/include/switch/kernel/thread.h \
+ /opt/devkitpro/libnx/include/switch/kernel/semaphore.h \
+ /opt/devkitpro/libnx/include/switch/kernel/virtmem.h \
+ /opt/devkitpro/libnx/include/switch/kernel/detect.h \
+ /opt/devkitpro/libnx/include/switch/kernel/random.h \
+ /opt/devkitpro/libnx/include/switch/kernel/jit.h \
+ /opt/devkitpro/libnx/include/switch/kernel/barrier.h \
+ /opt/devkitpro/libnx/include/switch/sf/hipc.h \
+ /opt/devkitpro/libnx/include/switch/sf/cmif.h \
+ /opt/devkitpro/libnx/include/switch/sf/service.h \
+ /opt/devkitpro/libnx/include/switch/sf/sessionmgr.h \
+ /opt/devkitpro/libnx/include/switch/sf/tipc.h \
+ /opt/devkitpro/libnx/include/switch/services/sm.h \
+ /opt/devkitpro/libnx/include/switch/services/smm.h \
+ /opt/devkitpro/libnx/include/switch/services/fs.h \
+ /opt/devkitpro/libnx/include/switch/services/../services/ncm_types.h \
+ /opt/devkitpro/libnx/include/switch/services/../services/../crypto/sha256.h \
+ /opt/devkitpro/libnx/include/switch/services/../services/acc.h \
+ /opt/devkitpro/libnx/include/switch/services/fsldr.h \
+ /opt/devkitpro/libnx/include/switch/services/fspr.h \
+ /opt/devkitpro/libnx/include/switch/services/apm.h \
+ /opt/devkitpro/libnx/include/switch/services/applet.h \
+ /opt/devkitpro/libnx/include/switch/services/../services/pdm.h \
+ /opt/devkitpro/libnx/include/switch/services/../services/caps.h \
+ /opt/devkitpro/libnx/include/switch/services/../services/pm.h \
+ /opt/devkitpro/libnx/include/switch/services/../services/set.h \
+ /opt/devkitpro/libnx/include/switch/services/../services/../services/time.h \
+ /opt/devkitpro/libnx/include/switch/services/../services/../services/btdrv_types.h \
+ /opt/devkitpro/libnx/include/switch/services/../services/../services/btdrv_ids.h \
+ /opt/devkitpro/libnx/include/switch/services/../services/../services/btm_types.h \
+ /opt/devkitpro/libnx/include/switch/services/async.h \
+ /opt/devkitpro/libnx/include/switch/services/../applets/error.h \
+ /opt/devkitpro/libnx/include/switch/services/audctl.h \
+ /opt/devkitpro/libnx/include/switch/services/../audio/audio.h \
+ /opt/devkitpro/libnx/include/switch/services/audin.h \
+ /opt/devkitpro/libnx/include/switch/services/audout.h \
+ /opt/devkitpro/libnx/include/switch/services/audrec.h \
+ /opt/devkitpro/libnx/include/switch/services/audren.h \
+ /opt/devkitpro/libnx/include/switch/services/auddev.h \
+ /opt/devkitpro/libnx/include/switch/services/hwopus.h \
+ /opt/devkitpro/libnx/include/switch/services/csrng.h \
+ /opt/devkitpro/libnx/include/switch/services/lbl.h \
+ /opt/devkitpro/libnx/include/switch/services/i2c.h \
+ /opt/devkitpro/libnx/include/switch/services/gpio.h \
+ /opt/devkitpro/libnx/include/switch/services/uart.h \
+ /opt/devkitpro/libnx/include/switch/services/bpc.h \
+ /opt/devkitpro/libnx/include/switch/services/pcv.h \
+ /opt/devkitpro/libnx/include/switch/services/clkrst.h \
+ /opt/devkitpro/libnx/include/switch/services/fan.h \
+ /opt/devkitpro/libnx/include/switch/services/pgl.h \
+ /opt/devkitpro/libnx/include/switch/services/psm.h \
+ /opt/devkitpro/libnx/include/switch/services/spsm.h \
+ /opt/devkitpro/libnx/include/switch/services/fatal.h \
+ /opt/devkitpro/libnx/include/switch/services/usb.h \
+ /opt/devkitpro/libnx/include/switch/services/usbds.h \
+ /opt/devkitpro/libnx/include/switch/services/usbhs.h \
+ /opt/devkitpro/libnx/include/switch/services/hid.h \
+ /opt/devkitpro/libnx/include/switch/services/hidbus.h \
+ /opt/devkitpro/libnx/include/switch/services/hiddbg.h \
+ /opt/devkitpro/libnx/include/switch/services/../services/hidsys.h \
+ /opt/devkitpro/libnx/include/switch/services/irs.h \
+ /opt/devkitpro/libnx/include/switch/services/pl.h \
+ /opt/devkitpro/libnx/include/switch/services/vi.h \
+ /opt/devkitpro/libnx/include/switch/services/nv.h \
+ /opt/devkitpro/libnx/include/switch/services/nifm.h \
+ /opt/devkitpro/libnx/include/switch/services/nim.h \
+ /opt/devkitpro/libnx/include/switch/services/ns.h \
+ /opt/devkitpro/libnx/include/switch/services/ldr.h \
+ /opt/devkitpro/libnx/include/switch/services/ro.h \
+ /opt/devkitpro/libnx/include/switch/services/tc.h \
+ /opt/devkitpro/libnx/include/switch/services/ts.h \
+ /opt/devkitpro/libnx/include/switch/services/ssl.h \
+ /opt/devkitpro/libnx/include/switch/services/lr.h \
+ /opt/devkitpro/libnx/include/switch/services/bt.h \
+ /opt/devkitpro/libnx/include/switch/services/../services/btdrv.h \
+ /opt/devkitpro/libnx/include/switch/services/btm.h \
+ /opt/devkitpro/libnx/include/switch/services/btmu.h \
+ /opt/devkitpro/libnx/include/switch/services/btmsys.h \
+ /opt/devkitpro/libnx/include/switch/services/spl.h \
+ /opt/devkitpro/libnx/include/switch/services/ncm.h \
+ /opt/devkitpro/libnx/include/switch/services/psc.h \
+ /opt/devkitpro/libnx/include/switch/services/capsa.h \
+ /opt/devkitpro/libnx/include/switch/services/capsc.h \
+ /opt/devkitpro/libnx/include/switch/services/capsdc.h \
+ /opt/devkitpro/libnx/include/switch/services/capsu.h \
+ /opt/devkitpro/libnx/include/switch/services/capssc.h \
+ /opt/devkitpro/libnx/include/switch/services/capssu.h \
+ /opt/devkitpro/libnx/include/switch/services/capmtp.h \
+ /opt/devkitpro/libnx/include/switch/services/nfc.h \
+ /opt/devkitpro/libnx/include/switch/services/../services/mii.h \
+ /opt/devkitpro/libnx/include/switch/services/wlaninf.h \
+ /opt/devkitpro/libnx/include/switch/services/pctl.h \
+ /opt/devkitpro/libnx/include/switch/services/grc.h \
+ /opt/devkitpro/libnx/include/switch/services/../display/native_window.h \
+ /opt/devkitpro/libnx/include/switch/services/../display/../nvidia/graphic_buffer.h \
+ /opt/devkitpro/libnx/include/switch/services/../display/../nvidia/../display/types.h \
+ /opt/devkitpro/libnx/include/switch/services/../display/../nvidia/types.h \
+ /opt/devkitpro/libnx/include/switch/services/../display/binder.h \
+ /opt/devkitpro/libnx/include/switch/services/../display/buffer_producer.h \
+ /opt/devkitpro/libnx/include/switch/services/../display/../nvidia/fence.h \
+ /opt/devkitpro/libnx/include/switch/services/../display/../nvidia/ioctl.h \
+ /opt/devkitpro/libnx/include/switch/services/friends.h \
+ /opt/devkitpro/libnx/include/switch/services/notif.h \
+ /opt/devkitpro/libnx/include/switch/services/miiimg.h \
+ /opt/devkitpro/libnx/include/switch/services/ldn.h \
+ /opt/devkitpro/libnx/include/switch/services/lp2p.h \
+ /opt/devkitpro/libnx/include/switch/services/news.h \
+ /opt/devkitpro/libnx/include/switch/services/ins.h \
+ /opt/devkitpro/libnx/include/switch/services/ectx.h \
+ /opt/devkitpro/libnx/include/switch/services/avm.h \
+ /opt/devkitpro/libnx/include/switch/services/mm.h \
+ /opt/devkitpro/libnx/include/switch/display/parcel.h \
+ /opt/devkitpro/libnx/include/switch/display/framebuffer.h \
+ /opt/devkitpro/libnx/include/switch/display/../nvidia/map.h \
+ /opt/devkitpro/libnx/include/switch/nvidia/address_space.h \
+ /opt/devkitpro/libnx/include/switch/nvidia/channel.h \
+ /opt/devkitpro/libnx/include/switch/nvidia/gpu.h \
+ /opt/devkitpro/libnx/include/switch/nvidia/gpu_channel.h \
+ /opt/devkitpro/libnx/include/switch/audio/driver.h \
+ /opt/devkitpro/libnx/include/switch/applets/libapplet.h \
+ /opt/devkitpro/libnx/include/switch/applets/album_la.h \
+ /opt/devkitpro/libnx/include/switch/applets/friends_la.h \
+ /opt/devkitpro/libnx/include/switch/applets/hid_la.h \
+ /opt/devkitpro/libnx/include/switch/applets/mii_la.h \
+ /opt/devkitpro/libnx/include/switch/applets/nfp_la.h \
+ /opt/devkitpro/libnx/include/switch/applets/nifm_la.h \
+ /opt/devkitpro/libnx/include/switch/applets/pctlauth.h \
+ /opt/devkitpro/libnx/include/switch/applets/psel.h \
+ /opt/devkitpro/libnx/include/switch/applets/swkbd.h \
+ /opt/devkitpro/libnx/include/switch/applets/web.h \
+ /opt/devkitpro/libnx/include/switch/runtime/env.h \
+ /opt/devkitpro/libnx/include/switch/runtime/hosversion.h \
+ /opt/devkitpro/libnx/include/switch/runtime/diag.h \
+ /opt/devkitpro/libnx/include/switch/runtime/nxlink.h \
+ /opt/devkitpro/libnx/include/switch/runtime/resolver.h \
+ /opt/devkitpro/libnx/include/switch/runtime/pad.h \
+ /opt/devkitpro/libnx/include/switch/runtime/ringcon.h \
+ /opt/devkitpro/libnx/include/switch/runtime/btdev.h \
+ /opt/devkitpro/libnx/include/switch/runtime/util/utf.h \
+ /opt/devkitpro/libnx/include/switch/runtime/devices/console.h \
+ /opt/devkitpro/libnx/include/switch/runtime/devices/usb_comms.h \
+ /opt/devkitpro/libnx/include/switch/runtime/devices/fs_dev.h \
+ /opt/devkitpro/libnx/include/switch/runtime/devices/romfs_dev.h \
+ /opt/devkitpro/libnx/include/switch/runtime/devices/socket.h \
+ /opt/devkitpro/libnx/include/switch/crypto/aes.h \
+ /opt/devkitpro/libnx/include/switch/crypto/aes_cbc.h \
+ /opt/devkitpro/libnx/include/switch/crypto/aes_ctr.h \
+ /opt/devkitpro/libnx/include/switch/crypto/aes_xts.h \
+ /opt/devkitpro/libnx/include/switch/crypto/cmac.h \
+ /opt/devkitpro/libnx/include/switch/crypto/sha1.h \
+ /opt/devkitpro/libnx/include/switch/crypto/hmac.h \
+ /opt/devkitpro/libnx/include/switch/crypto/crc.h
+/mnt/c/Users/carel/erp/sys-tune/sys-tune/nxExt/include/nxExt/ipc_server.h:
+/opt/devkitpro/libnx/include/switch.h:
+/opt/devkitpro/libnx/include/switch/types.h:
+/opt/devkitpro/libnx/include/switch/result.h:
+/opt/devkitpro/libnx/include/switch/nro.h:
+/opt/devkitpro/libnx/include/switch/nacp.h:
+/opt/devkitpro/libnx/include/switch/arm/tls.h:
+/opt/devkitpro/libnx/include/switch/arm/cache.h:
+/opt/devkitpro/libnx/include/switch/arm/counter.h:
+/opt/devkitpro/libnx/include/switch/kernel/svc.h:
+/opt/devkitpro/libnx/include/switch/kernel/../arm/thread_context.h:
+/opt/devkitpro/libnx/include/switch/kernel/wait.h:
+/opt/devkitpro/libnx/include/switch/kernel/mutex.h:
+/opt/devkitpro/libnx/include/switch/kernel/tmem.h:
+/opt/devkitpro/libnx/include/switch/kernel/shmem.h:
+/opt/devkitpro/libnx/include/switch/kernel/event.h:
+/opt/devkitpro/libnx/include/switch/kernel/levent.h:
+/opt/devkitpro/libnx/include/switch/kernel/uevent.h:
+/opt/devkitpro/libnx/include/switch/kernel/utimer.h:
+/opt/devkitpro/libnx/include/switch/kernel/rwlock.h:
+/opt/devkitpro/libnx/include/switch/kernel/../kernel/condvar.h:
+/opt/devkitpro/libnx/include/switch/kernel/thread.h:
+/opt/devkitpro/libnx/include/switch/kernel/semaphore.h:
+/opt/devkitpro/libnx/include/switch/kernel/virtmem.h:
+/opt/devkitpro/libnx/include/switch/kernel/detect.h:
+/opt/devkitpro/libnx/include/switch/kernel/random.h:
+/opt/devkitpro/libnx/include/switch/kernel/jit.h:
+/opt/devkitpro/libnx/include/switch/kernel/barrier.h:
+/opt/devkitpro/libnx/include/switch/sf/hipc.h:
+/opt/devkitpro/libnx/include/switch/sf/cmif.h:
+/opt/devkitpro/libnx/include/switch/sf/service.h:
+/opt/devkitpro/libnx/include/switch/sf/sessionmgr.h:
+/opt/devkitpro/libnx/include/switch/sf/tipc.h:
+/opt/devkitpro/libnx/include/switch/services/sm.h:
+/opt/devkitpro/libnx/include/switch/services/smm.h:
+/opt/devkitpro/libnx/include/switch/services/fs.h:
+/opt/devkitpro/libnx/include/switch/services/../services/ncm_types.h:
+/opt/devkitpro/libnx/include/switch/services/../services/../crypto/sha256.h:
+/opt/devkitpro/libnx/include/switch/services/../services/acc.h:
+/opt/devkitpro/libnx/include/switch/services/fsldr.h:
+/opt/devkitpro/libnx/include/switch/services/fspr.h:
+/opt/devkitpro/libnx/include/switch/services/apm.h:
+/opt/devkitpro/libnx/include/switch/services/applet.h:
+/opt/devkitpro/libnx/include/switch/services/../services/pdm.h:
+/opt/devkitpro/libnx/include/switch/services/../services/caps.h:
+/opt/devkitpro/libnx/include/switch/services/../services/pm.h:
+/opt/devkitpro/libnx/include/switch/services/../services/set.h:
+/opt/devkitpro/libnx/include/switch/services/../services/../services/time.h:
+/opt/devkitpro/libnx/include/switch/services/../services/../services/btdrv_types.h:
+/opt/devkitpro/libnx/include/switch/services/../services/../services/btdrv_ids.h:
+/opt/devkitpro/libnx/include/switch/services/../services/../services/btm_types.h:
+/opt/devkitpro/libnx/include/switch/services/async.h:
+/opt/devkitpro/libnx/include/switch/services/../applets/error.h:
+/opt/devkitpro/libnx/include/switch/services/audctl.h:
+/opt/devkitpro/libnx/include/switch/services/../audio/audio.h:
+/opt/devkitpro/libnx/include/switch/services/audin.h:
+/opt/devkitpro/libnx/include/switch/services/audout.h:
+/opt/devkitpro/libnx/include/switch/services/audrec.h:
+/opt/devkitpro/libnx/include/switch/services/audren.h:
+/opt/devkitpro/libnx/include/switch/services/auddev.h:
+/opt/devkitpro/libnx/include/switch/services/hwopus.h:
+/opt/devkitpro/libnx/include/switch/services/csrng.h:
+/opt/devkitpro/libnx/include/switch/services/lbl.h:
+/opt/devkitpro/libnx/include/switch/services/i2c.h:
+/opt/devkitpro/libnx/include/switch/services/gpio.h:
+/opt/devkitpro/libnx/include/switch/services/uart.h:
+/opt/devkitpro/libnx/include/switch/services/bpc.h:
+/opt/devkitpro/libnx/include/switch/services/pcv.h:
+/opt/devkitpro/libnx/include/switch/services/clkrst.h:
+/opt/devkitpro/libnx/include/switch/services/fan.h:
+/opt/devkitpro/libnx/include/switch/services/pgl.h:
+/opt/devkitpro/libnx/include/switch/services/psm.h:
+/opt/devkitpro/libnx/include/switch/services/spsm.h:
+/opt/devkitpro/libnx/include/switch/services/fatal.h:
+/opt/devkitpro/libnx/include/switch/services/usb.h:
+/opt/devkitpro/libnx/include/switch/services/usbds.h:
+/opt/devkitpro/libnx/include/switch/services/usbhs.h:
+/opt/devkitpro/libnx/include/switch/services/hid.h:
+/opt/devkitpro/libnx/include/switch/services/hidbus.h:
+/opt/devkitpro/libnx/include/switch/services/hiddbg.h:
+/opt/devkitpro/libnx/include/switch/services/../services/hidsys.h:
+/opt/devkitpro/libnx/include/switch/services/irs.h:
+/opt/devkitpro/libnx/include/switch/services/pl.h:
+/opt/devkitpro/libnx/include/switch/services/vi.h:
+/opt/devkitpro/libnx/include/switch/services/nv.h:
+/opt/devkitpro/libnx/include/switch/services/nifm.h:
+/opt/devkitpro/libnx/include/switch/services/nim.h:
+/opt/devkitpro/libnx/include/switch/services/ns.h:
+/opt/devkitpro/libnx/include/switch/services/ldr.h:
+/opt/devkitpro/libnx/include/switch/services/ro.h:
+/opt/devkitpro/libnx/include/switch/services/tc.h:
+/opt/devkitpro/libnx/include/switch/services/ts.h:
+/opt/devkitpro/libnx/include/switch/services/ssl.h:
+/opt/devkitpro/libnx/include/switch/services/lr.h:
+/opt/devkitpro/libnx/include/switch/services/bt.h:
+/opt/devkitpro/libnx/include/switch/services/../services/btdrv.h:
+/opt/devkitpro/libnx/include/switch/services/btm.h:
+/opt/devkitpro/libnx/include/switch/services/btmu.h:
+/opt/devkitpro/libnx/include/switch/services/btmsys.h:
+/opt/devkitpro/libnx/include/switch/services/spl.h:
+/opt/devkitpro/libnx/include/switch/services/ncm.h:
+/opt/devkitpro/libnx/include/switch/services/psc.h:
+/opt/devkitpro/libnx/include/switch/services/capsa.h:
+/opt/devkitpro/libnx/include/switch/services/capsc.h:
+/opt/devkitpro/libnx/include/switch/services/capsdc.h:
+/opt/devkitpro/libnx/include/switch/services/capsu.h:
+/opt/devkitpro/libnx/include/switch/services/capssc.h:
+/opt/devkitpro/libnx/include/switch/services/capssu.h:
+/opt/devkitpro/libnx/include/switch/services/capmtp.h:
+/opt/devkitpro/libnx/include/switch/services/nfc.h:
+/opt/devkitpro/libnx/include/switch/services/../services/mii.h:
+/opt/devkitpro/libnx/include/switch/services/wlaninf.h:
+/opt/devkitpro/libnx/include/switch/services/pctl.h:
+/opt/devkitpro/libnx/include/switch/services/grc.h:
+/opt/devkitpro/libnx/include/switch/services/../display/native_window.h:
+/opt/devkitpro/libnx/include/switch/services/../display/../nvidia/graphic_buffer.h:
+/opt/devkitpro/libnx/include/switch/services/../display/../nvidia/../display/types.h:
+/opt/devkitpro/libnx/include/switch/services/../display/../nvidia/types.h:
+/opt/devkitpro/libnx/include/switch/services/../display/binder.h:
+/opt/devkitpro/libnx/include/switch/services/../display/buffer_producer.h:
+/opt/devkitpro/libnx/include/switch/services/../display/../nvidia/fence.h:
+/opt/devkitpro/libnx/include/switch/services/../display/../nvidia/ioctl.h:
+/opt/devkitpro/libnx/include/switch/services/friends.h:
+/opt/devkitpro/libnx/include/switch/services/notif.h:
+/opt/devkitpro/libnx/include/switch/services/miiimg.h:
+/opt/devkitpro/libnx/include/switch/services/ldn.h:
+/opt/devkitpro/libnx/include/switch/services/lp2p.h:
+/opt/devkitpro/libnx/include/switch/services/news.h:
+/opt/devkitpro/libnx/include/switch/services/ins.h:
+/opt/devkitpro/libnx/include/switch/services/ectx.h:
+/opt/devkitpro/libnx/include/switch/services/avm.h:
+/opt/devkitpro/libnx/include/switch/services/mm.h:
+/opt/devkitpro/libnx/include/switch/display/parcel.h:
+/opt/devkitpro/libnx/include/switch/display/framebuffer.h:
+/opt/devkitpro/libnx/include/switch/display/../nvidia/map.h:
+/opt/devkitpro/libnx/include/switch/nvidia/address_space.h:
+/opt/devkitpro/libnx/include/switch/nvidia/channel.h:
+/opt/devkitpro/libnx/include/switch/nvidia/gpu.h:
+/opt/devkitpro/libnx/include/switch/nvidia/gpu_channel.h:
+/opt/devkitpro/libnx/include/switch/audio/driver.h:
+/opt/devkitpro/libnx/include/switch/applets/libapplet.h:
+/opt/devkitpro/libnx/include/switch/applets/album_la.h:
+/opt/devkitpro/libnx/include/switch/applets/friends_la.h:
+/opt/devkitpro/libnx/include/switch/applets/hid_la.h:
+/opt/devkitpro/libnx/include/switch/applets/mii_la.h:
+/opt/devkitpro/libnx/include/switch/applets/nfp_la.h:
+/opt/devkitpro/libnx/include/switch/applets/nifm_la.h:
+/opt/devkitpro/libnx/include/switch/applets/pctlauth.h:
+/opt/devkitpro/libnx/include/switch/applets/psel.h:
+/opt/devkitpro/libnx/include/switch/applets/swkbd.h:
+/opt/devkitpro/libnx/include/switch/applets/web.h:
+/opt/devkitpro/libnx/include/switch/runtime/env.h:
+/opt/devkitpro/libnx/include/switch/runtime/hosversion.h:
+/opt/devkitpro/libnx/include/switch/runtime/diag.h:
+/opt/devkitpro/libnx/include/switch/runtime/nxlink.h:
+/opt/devkitpro/libnx/include/switch/runtime/resolver.h:
+/opt/devkitpro/libnx/include/switch/runtime/pad.h:
+/opt/devkitpro/libnx/include/switch/runtime/ringcon.h:
+/opt/devkitpro/libnx/include/switch/runtime/btdev.h:
+/opt/devkitpro/libnx/include/switch/runtime/util/utf.h:
+/opt/devkitpro/libnx/include/switch/runtime/devices/console.h:
+/opt/devkitpro/libnx/include/switch/runtime/devices/usb_comms.h:
+/opt/devkitpro/libnx/include/switch/runtime/devices/fs_dev.h:
+/opt/devkitpro/libnx/include/switch/runtime/devices/romfs_dev.h:
+/opt/devkitpro/libnx/include/switch/runtime/devices/socket.h:
+/opt/devkitpro/libnx/include/switch/crypto/aes.h:
+/opt/devkitpro/libnx/include/switch/crypto/aes_cbc.h:
+/opt/devkitpro/libnx/include/switch/crypto/aes_ctr.h:
+/opt/devkitpro/libnx/include/switch/crypto/aes_xts.h:
+/opt/devkitpro/libnx/include/switch/crypto/cmac.h:
+/opt/devkitpro/libnx/include/switch/crypto/sha1.h:
+/opt/devkitpro/libnx/include/switch/crypto/hmac.h:
+/opt/devkitpro/libnx/include/switch/crypto/crc.h:
diff --git a/sys-tune/nxExt/release/ipc_server.o b/sys-tune/nxExt/release/ipc_server.o
new file mode 100644
index 0000000..1b0667f
Binary files /dev/null and b/sys-tune/nxExt/release/ipc_server.o differ
diff --git a/sys-tune/source/impl/aud.c b/sys-tune/source/impl/aud.c
index 609d801..5086971 100644
--- a/sys-tune/source/impl/aud.c
+++ b/sys-tune/source/impl/aud.c
@@ -1,113 +1,113 @@
-#define NX_SERVICE_ASSUME_NON_DOMAIN
-#include "aud.h"
-#include <switch.h>
-
-static Service g_audaSrv;
-static Service g_auddSrv;
-
-// IAudioSystemManagerForApplet
-Result audaInitialize(void) {
-    if (hosversionBefore(11,0,0))
-        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
-
-    return smGetService(&g_audaSrv, "aud:a");
-}
-
-void audaExit(void) {
-    serviceClose(&g_audaSrv);
-}
-
-Service* audaGetServiceSession(void) {
-    return &g_audaSrv;
-}
-
-Result audaRequestSuspendAudio(u64 pid, u64 delay) {
-    const struct {
-        u64 pid;
-        u64 timespan;
-    } in = { pid, delay };
-
-    return serviceDispatchIn(&g_audaSrv, 2, in);
-}
-
-Result audaRequestResumeAudio(u64 pid, u64 delay) {
-    const struct {
-        u64 pid;
-        u64 timespan;
-    } in = { pid, delay };
-
-    return serviceDispatchIn(&g_audaSrv, 3, in);
-}
-
-Result audaGetAudioOutputProcessMasterVolume(u64 pid, double* volume_out) {
-    return serviceDispatchInOut(&g_audaSrv, 4, pid, *volume_out);
-}
-
-Result audaSetAudioOutputProcessMasterVolume(u64 pid, u64 delay, double volume) {
-    const struct {
-        double volume;
-        u64 pid;
-        u64 timespan;
-    } in = { volume, pid, delay };
-
-    return serviceDispatchIn(&g_audaSrv, 5, in);
-}
-
-Result audaGetAudioInputProcessMasterVolume(u64 pid, double* volume_out) {
-    return serviceDispatchInOut(&g_audaSrv, 6, pid, *volume_out);
-}
-
-Result audaSetAudioInputProcessMasterVolume(u64 pid, u64 delay, double volume) {
-    const struct {
-        double volume;
-        u64 pid;
-        u64 timespan;
-    } in = { volume, pid, delay };
-
-    return serviceDispatchIn(&g_audaSrv, 7, in);
-}
-
-Result audaGetAudioOutputProcessRecordVolume(u64 pid, double* volume_out) {
-    return serviceDispatchInOut(&g_audaSrv, 8, pid, *volume_out);
-}
-
-Result audaSetAudioOutputProcessRecordVolume(u64 pid, u64 delay, double volume) {
-    const struct {
-        double volume;
-        u64 pid;
-        u64 timespan;
-    } in = { volume, pid, delay };
-
-    return serviceDispatchIn(&g_audaSrv, 9, in);
-}
-
-// IAudioSystemManagerForDebugger
-Result auddInitialize(void) {
-    return smGetService(&g_auddSrv, "aud:d");
-}
-
-void auddExit(void) {
-    serviceClose(&g_auddSrv);
-}
-
-Service* auddGetServiceSession(void) {
-    return &g_auddSrv;
-}
-
-Result auddRequestSuspendAudioForDebug(u64 pid, u64 delay) {
-    const struct {
-        u64 pid;
-        u64 timespan;
-    } in = { pid, delay };
-
-    return serviceDispatchIn(&g_auddSrv, 0, in);
-}
-
-Result auddRequestResumeAudioForDebug(u64 pid, u64 delay) {
-    const struct {
-        u64 pid;
-        u64 timespan;
-    } in = { pid, delay };
-
-    return serviceDispatchIn(&g_auddSrv, 1, in);
-}
+#define NX_SERVICE_ASSUME_NON_DOMAIN
+#include "aud.h"
+#include <switch.h>
+
+static Service g_audaSrv;
+static Service g_auddSrv;
+
+// IAudioSystemManagerForApplet
+Result audaInitialize(void) {
+    if (hosversionBefore(11,0,0))
+        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
+
+    return smGetService(&g_audaSrv, "aud:a");
+}
+
+void audaExit(void) {
+    serviceClose(&g_audaSrv);
+}
+
+Service* audaGetServiceSession(void) {
+    return &g_audaSrv;
+}
+
+Result audaRequestSuspendAudio(u64 pid, u64 delay) {
+    const struct {
+        u64 pid;
+        u64 timespan;
+    } in = { pid, delay };
+
+    return serviceDispatchIn(&g_audaSrv, 2, in);
+}
+
+Result audaRequestResumeAudio(u64 pid, u64 delay) {
+    const struct {
+        u64 pid;
+        u64 timespan;
+    } in = { pid, delay };
+
+    return serviceDispatchIn(&g_audaSrv, 3, in);
+}
+
+Result audaGetAudioOutputProcessMasterVolume(u64 pid, double* volume_out) {
+    return serviceDispatchInOut(&g_audaSrv, 4, pid, *volume_out);
+}
+
+Result audaSetAudioOutputProcessMasterVolume(u64 pid, u64 delay, double volume) {
+    const struct {
+        double volume;
+        u64 pid;
+        u64 timespan;
+    } in = { volume, pid, delay };
+
+    return serviceDispatchIn(&g_audaSrv, 5, in);
+}
+
+Result audaGetAudioInputProcessMasterVolume(u64 pid, double* volume_out) {
+    return serviceDispatchInOut(&g_audaSrv, 6, pid, *volume_out);
+}
+
+Result audaSetAudioInputProcessMasterVolume(u64 pid, u64 delay, double volume) {
+    const struct {
+        double volume;
+        u64 pid;
+        u64 timespan;
+    } in = { volume, pid, delay };
+
+    return serviceDispatchIn(&g_audaSrv, 7, in);
+}
+
+Result audaGetAudioOutputProcessRecordVolume(u64 pid, double* volume_out) {
+    return serviceDispatchInOut(&g_audaSrv, 8, pid, *volume_out);
+}
+
+Result audaSetAudioOutputProcessRecordVolume(u64 pid, u64 delay, double volume) {
+    const struct {
+        double volume;
+        u64 pid;
+        u64 timespan;
+    } in = { volume, pid, delay };
+
+    return serviceDispatchIn(&g_audaSrv, 9, in);
+}
+
+// IAudioSystemManagerForDebugger
+Result auddInitialize(void) {
+    return smGetService(&g_auddSrv, "aud:d");
+}
+
+void auddExit(void) {
+    serviceClose(&g_auddSrv);
+}
+
+Service* auddGetServiceSession(void) {
+    return &g_auddSrv;
+}
+
+Result auddRequestSuspendAudioForDebug(u64 pid, u64 delay) {
+    const struct {
+        u64 pid;
+        u64 timespan;
+    } in = { pid, delay };
+
+    return serviceDispatchIn(&g_auddSrv, 0, in);
+}
+
+Result auddRequestResumeAudioForDebug(u64 pid, u64 delay) {
+    const struct {
+        u64 pid;
+        u64 timespan;
+    } in = { pid, delay };
+
+    return serviceDispatchIn(&g_auddSrv, 1, in);
+}
diff --git a/sys-tune/source/impl/aud.c.bak b/sys-tune/source/impl/aud.c.bak
index 3f0f3f4..d883322 100644
--- a/sys-tune/source/impl/aud.c.bak
+++ b/sys-tune/source/impl/aud.c.bak
@@ -1,113 +1,113 @@
-#define NX_SERVICE_ASSUME_NON_DOMAIN
-#include "aud.h"
-#include <switch.h>
-
-static Service g_audaSrv;
-static Service g_auddSrv;
-
-// IAudioSystemManagerForApplet
-Result audaInitialize(void) {
-    if (hosversionBefore(11,0,0))
-        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
-
-    return smGetService(&g_audaSrv, "aud:a");
-}
-
-void audaExit(void) {
-    serviceClose(&g_audaSrv);
-}
-
-Service* audaGetServiceSession(void) {
-    return &g_audaSrv;
-}
-
-Result audaRequestSuspendAudio(u64 pid, u64 delay) {
-    const struct {
-        u64 pid;
-        u64 timespan;
-    } in = { pid, delay };
-
-    return serviceDispatchIn(&g_audaSrv, 2, in);
-}
-
-Result audaRequestResumeAudio(u64 pid, u64 delay) {
-    const struct {
-        u64 pid;
-        u64 timespan;
-    } in = { pid, delay };
-
-    return serviceDispatchIn(&g_audaSrv, 3, in);
-}
-
-Result audaGetAudioOutputProcessMasterVolume(u64 pid, float* volume_out) {
-    return serviceDispatchInOut(&g_audaSrv, 4, pid, *volume_out);
-}
-
-Result audaSetAudioOutputProcessMasterVolume(u64 pid, u64 delay, float volume) {
-    const struct {
-        float volume;
-        u64 pid;
-        u64 timespan;
-    } in = { volume, pid, delay };
-
-    return serviceDispatchIn(&g_audaSrv, 5, in);
-}
-
-Result audaGetAudioInputProcessMasterVolume(u64 pid, float* volume_out) {
-    return serviceDispatchInOut(&g_audaSrv, 6, pid, *volume_out);
-}
-
-Result audaSetAudioInputProcessMasterVolume(u64 pid, u64 delay, float volume) {
-    const struct {
-        float volume;
-        u64 pid;
-        u64 timespan;
-    } in = { volume, pid, delay };
-
-    return serviceDispatchIn(&g_audaSrv, 7, in);
-}
-
-Result audaGetAudioOutputProcessRecordVolume(u64 pid, float* volume_out) {
-    return serviceDispatchInOut(&g_audaSrv, 8, pid, *volume_out);
-}
-
-Result audaSetAudioOutputProcessRecordVolume(u64 pid, u64 delay, float volume) {
-    const struct {
-        float volume;
-        u64 pid;
-        u64 timespan;
-    } in = { volume, pid, delay };
-
-    return serviceDispatchIn(&g_audaSrv, 9, in);
-}
-
-// IAudioSystemManagerForDebugger
-Result auddInitialize(void) {
-    return smGetService(&g_auddSrv, "aud:d");
-}
-
-void auddExit(void) {
-    serviceClose(&g_auddSrv);
-}
-
-Service* auddGetServiceSession(void) {
-    return &g_auddSrv;
-}
-
-Result auddRequestSuspendAudioForDebug(u64 pid, u64 delay) {
-    const struct {
-        u64 pid;
-        u64 timespan;
-    } in = { pid, delay };
-
-    return serviceDispatchIn(&g_auddSrv, 0, in);
-}
-
-Result auddRequestResumeAudioForDebug(u64 pid, u64 delay) {
-    const struct {
-        u64 pid;
-        u64 timespan;
-    } in = { pid, delay };
-
-    return serviceDispatchIn(&g_auddSrv, 1, in);
-}
+#define NX_SERVICE_ASSUME_NON_DOMAIN
+#include "aud.h"
+#include <switch.h>
+
+static Service g_audaSrv;
+static Service g_auddSrv;
+
+// IAudioSystemManagerForApplet
+Result audaInitialize(void) {
+    if (hosversionBefore(11,0,0))
+        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
+
+    return smGetService(&g_audaSrv, "aud:a");
+}
+
+void audaExit(void) {
+    serviceClose(&g_audaSrv);
+}
+
+Service* audaGetServiceSession(void) {
+    return &g_audaSrv;
+}
+
+Result audaRequestSuspendAudio(u64 pid, u64 delay) {
+    const struct {
+        u64 pid;
+        u64 timespan;
+    } in = { pid, delay };
+
+    return serviceDispatchIn(&g_audaSrv, 2, in);
+}
+
+Result audaRequestResumeAudio(u64 pid, u64 delay) {
+    const struct {
+        u64 pid;
+        u64 timespan;
+    } in = { pid, delay };
+
+    return serviceDispatchIn(&g_audaSrv, 3, in);
+}
+
+Result audaGetAudioOutputProcessMasterVolume(u64 pid, float* volume_out) {
+    return serviceDispatchInOut(&g_audaSrv, 4, pid, *volume_out);
+}
+
+Result audaSetAudioOutputProcessMasterVolume(u64 pid, u64 delay, float volume) {
+    const struct {
+        float volume;
+        u64 pid;
+        u64 timespan;
+    } in = { volume, pid, delay };
+
+    return serviceDispatchIn(&g_audaSrv, 5, in);
+}
+
+Result audaGetAudioInputProcessMasterVolume(u64 pid, float* volume_out) {
+    return serviceDispatchInOut(&g_audaSrv, 6, pid, *volume_out);
+}
+
+Result audaSetAudioInputProcessMasterVolume(u64 pid, u64 delay, float volume) {
+    const struct {
+        float volume;
+        u64 pid;
+        u64 timespan;
+    } in = { volume, pid, delay };
+
+    return serviceDispatchIn(&g_audaSrv, 7, in);
+}
+
+Result audaGetAudioOutputProcessRecordVolume(u64 pid, float* volume_out) {
+    return serviceDispatchInOut(&g_audaSrv, 8, pid, *volume_out);
+}
+
+Result audaSetAudioOutputProcessRecordVolume(u64 pid, u64 delay, float volume) {
+    const struct {
+        float volume;
+        u64 pid;
+        u64 timespan;
+    } in = { volume, pid, delay };
+
+    return serviceDispatchIn(&g_audaSrv, 9, in);
+}
+
+// IAudioSystemManagerForDebugger
+Result auddInitialize(void) {
+    return smGetService(&g_auddSrv, "aud:d");
+}
+
+void auddExit(void) {
+    serviceClose(&g_auddSrv);
+}
+
+Service* auddGetServiceSession(void) {
+    return &g_auddSrv;
+}
+
+Result auddRequestSuspendAudioForDebug(u64 pid, u64 delay) {
+    const struct {
+        u64 pid;
+        u64 timespan;
+    } in = { pid, delay };
+
+    return serviceDispatchIn(&g_auddSrv, 0, in);
+}
+
+Result auddRequestResumeAudioForDebug(u64 pid, u64 delay) {
+    const struct {
+        u64 pid;
+        u64 timespan;
+    } in = { pid, delay };
+
+    return serviceDispatchIn(&g_auddSrv, 1, in);
+}
diff --git a/sys-tune/source/impl/aud.h b/sys-tune/source/impl/aud.h
index 4d97cb1..1dd29c5 100644
--- a/sys-tune/source/impl/aud.h
+++ b/sys-tune/source/impl/aud.h
@@ -1,51 +1,51 @@
-/**
- * @file aud.h
- * @brief Only available on [11.0.0+].
- * @note Only one session may be open at once.
- * @author TotalJustice
- * @copyright libnx Authors
- */
-
-#pragma once
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <switch.h>
-
-#define AUD_MAX_DELAY (1000000000ULL)
-
-/// Initialize aud:a. Only available on [11.0.0+].
-Result audaInitialize(void);
-/// Exit aud:a.
-void audaExit(void);
-/// Gets the Service for aud:a.
-Service* audaGetServiceSession(void);
-
-/// Initialize aud:d. Only available on [11.0.0+].
-Result auddInitialize(void);
-/// Exit aud:d.
-void auddExit(void);
-/// Gets the Service for aud:d.
-Service* auddGetServiceSession(void);
-
-// All tested (and works)
-Result audaRequestSuspendAudio(u64 pid, u64 delay);
-Result audaRequestResumeAudio(u64 pid, u64 delay);
-Result audaGetAudioOutputProcessMasterVolume(u64 pid, double* volume_out);
-// Doesn't seem to apply... Maybe Input is overriding this?
-Result audaSetAudioOutputProcessMasterVolume(u64 pid, u64 delay, double volume);
-Result audaGetAudioInputProcessMasterVolume(u64 pid, double* volume_out);
-// Sets both Output and Input volume
-Result audaSetAudioInputProcessMasterVolume(u64 pid, u64 delay, double volume);
-Result audaGetAudioOutputProcessRecordVolume(u64 pid, double* volume_out);
-Result audaSetAudioOutputProcessRecordVolume(u64 pid, u64 delay, double volume);
-
-// All tested (and works)
-Result auddRequestSuspendAudioForDebug(u64 pid, u64 delay);
-Result auddRequestResumeAudioForDebug(u64 pid, u64 delay);
-
-#ifdef __cplusplus
-}
-#endif
+/**
+ * @file aud.h
+ * @brief Only available on [11.0.0+].
+ * @note Only one session may be open at once.
+ * @author TotalJustice
+ * @copyright libnx Authors
+ */
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <switch.h>
+
+#define AUD_MAX_DELAY (1000000000ULL)
+
+/// Initialize aud:a. Only available on [11.0.0+].
+Result audaInitialize(void);
+/// Exit aud:a.
+void audaExit(void);
+/// Gets the Service for aud:a.
+Service* audaGetServiceSession(void);
+
+/// Initialize aud:d. Only available on [11.0.0+].
+Result auddInitialize(void);
+/// Exit aud:d.
+void auddExit(void);
+/// Gets the Service for aud:d.
+Service* auddGetServiceSession(void);
+
+// All tested (and works)
+Result audaRequestSuspendAudio(u64 pid, u64 delay);
+Result audaRequestResumeAudio(u64 pid, u64 delay);
+Result audaGetAudioOutputProcessMasterVolume(u64 pid, double* volume_out);
+// Doesn't seem to apply... Maybe Input is overriding this?
+Result audaSetAudioOutputProcessMasterVolume(u64 pid, u64 delay, double volume);
+Result audaGetAudioInputProcessMasterVolume(u64 pid, double* volume_out);
+// Sets both Output and Input volume
+Result audaSetAudioInputProcessMasterVolume(u64 pid, u64 delay, double volume);
+Result audaGetAudioOutputProcessRecordVolume(u64 pid, double* volume_out);
+Result audaSetAudioOutputProcessRecordVolume(u64 pid, u64 delay, double volume);
+
+// All tested (and works)
+Result auddRequestSuspendAudioForDebug(u64 pid, u64 delay);
+Result auddRequestResumeAudioForDebug(u64 pid, u64 delay);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/sys-tune/source/impl/aud.h.bak b/sys-tune/source/impl/aud.h.bak
index f6d1832..dc1912b 100644
--- a/sys-tune/source/impl/aud.h.bak
+++ b/sys-tune/source/impl/aud.h.bak
@@ -1,51 +1,51 @@
-/**
- * @file aud.h
- * @brief Only available on [11.0.0+].
- * @note Only one session may be open at once.
- * @author TotalJustice
- * @copyright libnx Authors
- */
-
-#pragma once
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <switch.h>
-
-#define AUD_MAX_DELAY (1000000000ULL)
-
-/// Initialize aud:a. Only available on [11.0.0+].
-Result audaInitialize(void);
-/// Exit aud:a.
-void audaExit(void);
-/// Gets the Service for aud:a.
-Service* audaGetServiceSession(void);
-
-/// Initialize aud:d. Only available on [11.0.0+].
-Result auddInitialize(void);
-/// Exit aud:d.
-void auddExit(void);
-/// Gets the Service for aud:d.
-Service* auddGetServiceSession(void);
-
-// All tested (and works)
-Result audaRequestSuspendAudio(u64 pid, u64 delay);
-Result audaRequestResumeAudio(u64 pid, u64 delay);
-Result audaGetAudioOutputProcessMasterVolume(u64 pid, float* volume_out);
-// Doesn't seem to apply... Maybe Input is overriding this?
-Result audaSetAudioOutputProcessMasterVolume(u64 pid, u64 delay, float volume);
-Result audaGetAudioInputProcessMasterVolume(u64 pid, float* volume_out);
-// Sets both Output and Input volume
-Result audaSetAudioInputProcessMasterVolume(u64 pid, u64 delay, float volume);
-Result audaGetAudioOutputProcessRecordVolume(u64 pid, float* volume_out);
-Result audaSetAudioOutputProcessRecordVolume(u64 pid, u64 delay, float volume);
-
-// All tested (and works)
-Result auddRequestSuspendAudioForDebug(u64 pid, u64 delay);
-Result auddRequestResumeAudioForDebug(u64 pid, u64 delay);
-
-#ifdef __cplusplus
-}
-#endif
+/**
+ * @file aud.h
+ * @brief Only available on [11.0.0+].
+ * @note Only one session may be open at once.
+ * @author TotalJustice
+ * @copyright libnx Authors
+ */
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <switch.h>
+
+#define AUD_MAX_DELAY (1000000000ULL)
+
+/// Initialize aud:a. Only available on [11.0.0+].
+Result audaInitialize(void);
+/// Exit aud:a.
+void audaExit(void);
+/// Gets the Service for aud:a.
+Service* audaGetServiceSession(void);
+
+/// Initialize aud:d. Only available on [11.0.0+].
+Result auddInitialize(void);
+/// Exit aud:d.
+void auddExit(void);
+/// Gets the Service for aud:d.
+Service* auddGetServiceSession(void);
+
+// All tested (and works)
+Result audaRequestSuspendAudio(u64 pid, u64 delay);
+Result audaRequestResumeAudio(u64 pid, u64 delay);
+Result audaGetAudioOutputProcessMasterVolume(u64 pid, float* volume_out);
+// Doesn't seem to apply... Maybe Input is overriding this?
+Result audaSetAudioOutputProcessMasterVolume(u64 pid, u64 delay, float volume);
+Result audaGetAudioInputProcessMasterVolume(u64 pid, float* volume_out);
+// Sets both Output and Input volume
+Result audaSetAudioInputProcessMasterVolume(u64 pid, u64 delay, float volume);
+Result audaGetAudioOutputProcessRecordVolume(u64 pid, float* volume_out);
+Result audaSetAudioOutputProcessRecordVolume(u64 pid, u64 delay, float volume);
+
+// All tested (and works)
+Result auddRequestSuspendAudioForDebug(u64 pid, u64 delay);
+Result auddRequestResumeAudioForDebug(u64 pid, u64 delay);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/sys-tune/source/impl/aud_wrapper.c b/sys-tune/source/impl/aud_wrapper.c
index d93d906..41ee2fa 100644
--- a/sys-tune/source/impl/aud_wrapper.c
+++ b/sys-tune/source/impl/aud_wrapper.c
@@ -1,78 +1,78 @@
-#include "aud.h"
-#include "audout.h"
-#include "aud_wrapper.h"
-#include <switch.h>
-
-// IAudioSystemManagerForApplet
-Result audWrapperInitialize(void) {
-    if (hosversionBefore(11,0,0))
-        return audoutaInitialize();
-    else
-        return audaInitialize();
-}
-
-void audWrapperExit(void) {
-    if (hosversionBefore(11,0,0))
-        audoutaInitialize();
-    else
-        audaInitialize();
-}
-
-Result audWrapperRequestSuspend(u64 pid, u64 delay) {
-    Handle h;
-    if (hosversionBefore(4,0,0))
-        return audoutaRequestSuspendOld(pid, delay, &h);
-    else if (hosversionBefore(11,0,0))
-        return audoutaRequestSuspend(pid, delay);
-    else
-        return audaRequestSuspendAudio(pid, delay);
-}
-
-Result audWrapperRequestResume(u64 pid, u64 delay) {
-    Handle h;
-    if (hosversionBefore(4,0,0))
-        return audoutaRequestResumeOld(pid, delay, &h);
-    else if (hosversionBefore(11,0,0))
-        return audoutaRequestResume(pid, delay);
-    else
-        return audaRequestResumeAudio(pid, delay);
-}
-
-Result audWrapperGetProcessMasterVolume(u64 pid, double* volume_out) {
-    if (hosversionBefore(11,0,0))
-        return audoutaGetProcessMasterVolume(pid, volume_out);
-    else
-        return audaGetAudioOutputProcessMasterVolume(pid, volume_out);
-}
-
-Result audWrapperSetProcessMasterVolume(u64 pid, u64 delay, double volume) {
-    if (hosversionBefore(11,0,0))
-        return audoutaSetProcessMasterVolume(pid, delay, volume);
-    else {
-        Result rc = audaSetAudioOutputProcessMasterVolume(pid, delay, volume);
-        if (R_FAILED(rc)) {
-            return rc;
-        }
-        return audaSetAudioInputProcessMasterVolume(pid, delay, volume);
-    }
-}
-
-Result audWrapperGetProcessRecordVolume(u64 pid, double* volume_out) {
-    if (hosversionBefore(4,0,0))
-        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
-
-    if (hosversionBefore(11,0,0))
-        return audoutaGetProcessRecordVolume(pid, volume_out);
-    else
-        return audaGetAudioOutputProcessRecordVolume(pid, volume_out);
-}
-
-Result audWrapperSetProcessRecordVolume(u64 pid, u64 delay, double volume) {
-    if (hosversionBefore(4,0,0))
-        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
-
-    if (hosversionBefore(11,0,0))
-        return audoutaSetProcessRecordVolume(pid, delay, volume);
-    else
-        return audaSetAudioOutputProcessRecordVolume(pid, delay, volume);
-}
+#include "aud.h"
+#include "audout.h"
+#include "aud_wrapper.h"
+#include <switch.h>
+
+// IAudioSystemManagerForApplet
+Result audWrapperInitialize(void) {
+    if (hosversionBefore(11,0,0))
+        return audoutaInitialize();
+    else
+        return audaInitialize();
+}
+
+void audWrapperExit(void) {
+    if (hosversionBefore(11,0,0))
+        audoutaInitialize();
+    else
+        audaInitialize();
+}
+
+Result audWrapperRequestSuspend(u64 pid, u64 delay) {
+    Handle h;
+    if (hosversionBefore(4,0,0))
+        return audoutaRequestSuspendOld(pid, delay, &h);
+    else if (hosversionBefore(11,0,0))
+        return audoutaRequestSuspend(pid, delay);
+    else
+        return audaRequestSuspendAudio(pid, delay);
+}
+
+Result audWrapperRequestResume(u64 pid, u64 delay) {
+    Handle h;
+    if (hosversionBefore(4,0,0))
+        return audoutaRequestResumeOld(pid, delay, &h);
+    else if (hosversionBefore(11,0,0))
+        return audoutaRequestResume(pid, delay);
+    else
+        return audaRequestResumeAudio(pid, delay);
+}
+
+Result audWrapperGetProcessMasterVolume(u64 pid, double* volume_out) {
+    if (hosversionBefore(11,0,0))
+        return audoutaGetProcessMasterVolume(pid, volume_out);
+    else
+        return audaGetAudioOutputProcessMasterVolume(pid, volume_out);
+}
+
+Result audWrapperSetProcessMasterVolume(u64 pid, u64 delay, double volume) {
+    if (hosversionBefore(11,0,0))
+        return audoutaSetProcessMasterVolume(pid, delay, volume);
+    else {
+        Result rc = audaSetAudioOutputProcessMasterVolume(pid, delay, volume);
+        if (R_FAILED(rc)) {
+            return rc;
+        }
+        return audaSetAudioInputProcessMasterVolume(pid, delay, volume);
+    }
+}
+
+Result audWrapperGetProcessRecordVolume(u64 pid, double* volume_out) {
+    if (hosversionBefore(4,0,0))
+        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
+
+    if (hosversionBefore(11,0,0))
+        return audoutaGetProcessRecordVolume(pid, volume_out);
+    else
+        return audaGetAudioOutputProcessRecordVolume(pid, volume_out);
+}
+
+Result audWrapperSetProcessRecordVolume(u64 pid, u64 delay, double volume) {
+    if (hosversionBefore(4,0,0))
+        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
+
+    if (hosversionBefore(11,0,0))
+        return audoutaSetProcessRecordVolume(pid, delay, volume);
+    else
+        return audaSetAudioOutputProcessRecordVolume(pid, delay, volume);
+}
diff --git a/sys-tune/source/impl/aud_wrapper.c.bak b/sys-tune/source/impl/aud_wrapper.c.bak
index a1758df..3e0c995 100644
--- a/sys-tune/source/impl/aud_wrapper.c.bak
+++ b/sys-tune/source/impl/aud_wrapper.c.bak
@@ -1,78 +1,78 @@
-#include "aud.h"
-#include "audout.h"
-#include "aud_wrapper.h"
-#include <switch.h>
-
-// IAudioSystemManagerForApplet
-Result audWrapperInitialize(void) {
-    if (hosversionBefore(11,0,0))
-        return audoutaInitialize();
-    else
-        return audaInitialize();
-}
-
-void audWrapperExit(void) {
-    if (hosversionBefore(11,0,0))
-        audoutaInitialize();
-    else
-        audaInitialize();
-}
-
-Result audWrapperRequestSuspend(u64 pid, u64 delay) {
-    Handle h;
-    if (hosversionBefore(4,0,0))
-        return audoutaRequestSuspendOld(pid, delay, &h);
-    else if (hosversionBefore(11,0,0))
-        return audoutaRequestSuspend(pid, delay);
-    else
-        return audaRequestSuspendAudio(pid, delay);
-}
-
-Result audWrapperRequestResume(u64 pid, u64 delay) {
-    Handle h;
-    if (hosversionBefore(4,0,0))
-        return audoutaRequestResumeOld(pid, delay, &h);
-    else if (hosversionBefore(11,0,0))
-        return audoutaRequestResume(pid, delay);
-    else
-        return audaRequestResumeAudio(pid, delay);
-}
-
-Result audWrapperGetProcessMasterVolume(u64 pid, float* volume_out) {
-    if (hosversionBefore(11,0,0))
-        return audoutaGetProcessMasterVolume(pid, volume_out);
-    else
-        return audaGetAudioOutputProcessMasterVolume(pid, volume_out);
-}
-
-Result audWrapperSetProcessMasterVolume(u64 pid, u64 delay, float volume) {
-    if (hosversionBefore(11,0,0))
-        return audoutaSetProcessMasterVolume(pid, delay, volume);
-    else {
-        Result rc = audaSetAudioOutputProcessMasterVolume(pid, delay, volume);
-        if (R_FAILED(rc)) {
-            return rc;
-        }
-        return audaSetAudioInputProcessMasterVolume(pid, delay, volume);
-    }
-}
-
-Result audWrapperGetProcessRecordVolume(u64 pid, float* volume_out) {
-    if (hosversionBefore(4,0,0))
-        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
-
-    if (hosversionBefore(11,0,0))
-        return audoutaGetProcessRecordVolume(pid, volume_out);
-    else
-        return audaGetAudioOutputProcessRecordVolume(pid, volume_out);
-}
-
-Result audWrapperSetProcessRecordVolume(u64 pid, u64 delay, float volume) {
-    if (hosversionBefore(4,0,0))
-        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
-
-    if (hosversionBefore(11,0,0))
-        return audoutaSetProcessRecordVolume(pid, delay, volume);
-    else
-        return audaSetAudioOutputProcessRecordVolume(pid, delay, volume);
-}
+#include "aud.h"
+#include "audout.h"
+#include "aud_wrapper.h"
+#include <switch.h>
+
+// IAudioSystemManagerForApplet
+Result audWrapperInitialize(void) {
+    if (hosversionBefore(11,0,0))
+        return audoutaInitialize();
+    else
+        return audaInitialize();
+}
+
+void audWrapperExit(void) {
+    if (hosversionBefore(11,0,0))
+        audoutaInitialize();
+    else
+        audaInitialize();
+}
+
+Result audWrapperRequestSuspend(u64 pid, u64 delay) {
+    Handle h;
+    if (hosversionBefore(4,0,0))
+        return audoutaRequestSuspendOld(pid, delay, &h);
+    else if (hosversionBefore(11,0,0))
+        return audoutaRequestSuspend(pid, delay);
+    else
+        return audaRequestSuspendAudio(pid, delay);
+}
+
+Result audWrapperRequestResume(u64 pid, u64 delay) {
+    Handle h;
+    if (hosversionBefore(4,0,0))
+        return audoutaRequestResumeOld(pid, delay, &h);
+    else if (hosversionBefore(11,0,0))
+        return audoutaRequestResume(pid, delay);
+    else
+        return audaRequestResumeAudio(pid, delay);
+}
+
+Result audWrapperGetProcessMasterVolume(u64 pid, float* volume_out) {
+    if (hosversionBefore(11,0,0))
+        return audoutaGetProcessMasterVolume(pid, volume_out);
+    else
+        return audaGetAudioOutputProcessMasterVolume(pid, volume_out);
+}
+
+Result audWrapperSetProcessMasterVolume(u64 pid, u64 delay, float volume) {
+    if (hosversionBefore(11,0,0))
+        return audoutaSetProcessMasterVolume(pid, delay, volume);
+    else {
+        Result rc = audaSetAudioOutputProcessMasterVolume(pid, delay, volume);
+        if (R_FAILED(rc)) {
+            return rc;
+        }
+        return audaSetAudioInputProcessMasterVolume(pid, delay, volume);
+    }
+}
+
+Result audWrapperGetProcessRecordVolume(u64 pid, float* volume_out) {
+    if (hosversionBefore(4,0,0))
+        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
+
+    if (hosversionBefore(11,0,0))
+        return audoutaGetProcessRecordVolume(pid, volume_out);
+    else
+        return audaGetAudioOutputProcessRecordVolume(pid, volume_out);
+}
+
+Result audWrapperSetProcessRecordVolume(u64 pid, u64 delay, float volume) {
+    if (hosversionBefore(4,0,0))
+        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
+
+    if (hosversionBefore(11,0,0))
+        return audoutaSetProcessRecordVolume(pid, delay, volume);
+    else
+        return audaSetAudioOutputProcessRecordVolume(pid, delay, volume);
+}
diff --git a/sys-tune/source/impl/aud_wrapper.h b/sys-tune/source/impl/aud_wrapper.h
index 76261f8..5ba0410 100644
--- a/sys-tune/source/impl/aud_wrapper.h
+++ b/sys-tune/source/impl/aud_wrapper.h
@@ -1,25 +1,25 @@
-#pragma once
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <switch.h>
-
-#define AUD_WRAPPER_MAX_DELAY (1000000000ULL)
-
-/// Initialize audout:a until [11.0.0], otherwise aud:a.
-Result audWrapperInitialize(void);
-/// Exit audout:a until [11.0.0], otherwise aud:a.
-void audWrapperExit(void);
-
-Result audWrapperRequestSuspend(u64 pid, u64 delay);
-Result audWrapperRequestResume(u64 pid, u64 delay);
-Result audWrapperGetProcessMasterVolume(u64 pid, double* volume_out);
-Result audWrapperSetProcessMasterVolume(u64 pid, u64 delay, double volume);
-Result audWrapperGetProcessRecordVolume(u64 pid, double* volume_out);
-Result audWrapperSetProcessRecordVolume(u64 pid, u64 delay, double volume);
-
-#ifdef __cplusplus
-}
-#endif
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <switch.h>
+
+#define AUD_WRAPPER_MAX_DELAY (1000000000ULL)
+
+/// Initialize audout:a until [11.0.0], otherwise aud:a.
+Result audWrapperInitialize(void);
+/// Exit audout:a until [11.0.0], otherwise aud:a.
+void audWrapperExit(void);
+
+Result audWrapperRequestSuspend(u64 pid, u64 delay);
+Result audWrapperRequestResume(u64 pid, u64 delay);
+Result audWrapperGetProcessMasterVolume(u64 pid, double* volume_out);
+Result audWrapperSetProcessMasterVolume(u64 pid, u64 delay, double volume);
+Result audWrapperGetProcessRecordVolume(u64 pid, double* volume_out);
+Result audWrapperSetProcessRecordVolume(u64 pid, u64 delay, double volume);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/sys-tune/source/impl/aud_wrapper.h.bak b/sys-tune/source/impl/aud_wrapper.h.bak
index d772814..989cc03 100644
--- a/sys-tune/source/impl/aud_wrapper.h.bak
+++ b/sys-tune/source/impl/aud_wrapper.h.bak
@@ -1,25 +1,25 @@
-#pragma once
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <switch.h>
-
-#define AUD_WRAPPER_MAX_DELAY (1000000000ULL)
-
-/// Initialize audout:a until [11.0.0], otherwise aud:a.
-Result audWrapperInitialize(void);
-/// Exit audout:a until [11.0.0], otherwise aud:a.
-void audWrapperExit(void);
-
-Result audWrapperRequestSuspend(u64 pid, u64 delay);
-Result audWrapperRequestResume(u64 pid, u64 delay);
-Result audWrapperGetProcessMasterVolume(u64 pid, float* volume_out);
-Result audWrapperSetProcessMasterVolume(u64 pid, u64 delay, float volume);
-Result audWrapperGetProcessRecordVolume(u64 pid, float* volume_out);
-Result audWrapperSetProcessRecordVolume(u64 pid, u64 delay, float volume);
-
-#ifdef __cplusplus
-}
-#endif
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <switch.h>
+
+#define AUD_WRAPPER_MAX_DELAY (1000000000ULL)
+
+/// Initialize audout:a until [11.0.0], otherwise aud:a.
+Result audWrapperInitialize(void);
+/// Exit audout:a until [11.0.0], otherwise aud:a.
+void audWrapperExit(void);
+
+Result audWrapperRequestSuspend(u64 pid, u64 delay);
+Result audWrapperRequestResume(u64 pid, u64 delay);
+Result audWrapperGetProcessMasterVolume(u64 pid, float* volume_out);
+Result audWrapperSetProcessMasterVolume(u64 pid, u64 delay, float volume);
+Result audWrapperGetProcessRecordVolume(u64 pid, float* volume_out);
+Result audWrapperSetProcessRecordVolume(u64 pid, u64 delay, float volume);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/sys-tune/source/impl/audout.c b/sys-tune/source/impl/audout.c
index 37b73d8..f66936b 100644
--- a/sys-tune/source/impl/audout.c
+++ b/sys-tune/source/impl/audout.c
@@ -1,138 +1,138 @@
-#define NX_SERVICE_ASSUME_NON_DOMAIN
-#include "audout.h"
-#include <switch.h>
-
-static Service g_audoutaSrv;
-static Service g_audoutdSrv;
-
-// AudioOutManagerForApplet
-Result audoutaInitialize(void) {
-    if (hosversionAtLeast(11,0,0))
-        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
-
-    return smGetService(&g_audoutaSrv, "audout:a");
-}
-
-void audoutaExit(void) {
-    serviceClose(&g_audoutaSrv);
-}
-
-Service* audoutaGetServiceSession(void) {
-    return &g_audoutaSrv;
-}
-
-Result audoutaRequestSuspendOld(u64 pid, u64 delay, Handle* handle_out) {
-    if (hosversionAtLeast(4,0,0))
-        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
-
-    const struct {
-        u64 pid;
-        u64 timespan;
-    } in = { pid, delay };
-
-    return serviceDispatchInOut(&g_audoutaSrv, 0, in, *handle_out);
-}
-
-Result audoutaRequestResumeOld(u64 pid, u64 delay, Handle* handle_out) {
-    if (hosversionAtLeast(4,0,0))
-        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
-
-    const struct {
-        u64 pid;
-        u64 timespan;
-    } in = { pid, delay };
-
-    return serviceDispatchInOut(&g_audoutaSrv, 1, in, *handle_out);
-}
-
-Result audoutaRequestSuspend(u64 pid, u64 delay) {
-    if (hosversionBefore(4,0,0))
-        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
-
-    const struct {
-        u64 pid;
-        u64 timespan;
-    } in = { pid, delay };
-
-    return serviceDispatchIn(&g_audoutaSrv, 0, in);
-}
-
-Result audoutaRequestResume(u64 pid, u64 delay) {
-    if (hosversionBefore(4,0,0))
-        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
-
-    const struct {
-        u64 pid;
-        u64 timespan;
-    } in = { pid, delay };
-
-    return serviceDispatchIn(&g_audoutaSrv, 1, in);
-}
-
-Result audoutaGetProcessMasterVolume(u64 pid, double* volume_out) {
-    return serviceDispatchInOut(&g_audoutaSrv, 2, pid, *volume_out);
-}
-
-Result audoutaSetProcessMasterVolume(u64 pid, u64 delay, double volume) {
-    const struct {
-        double volume;
-        u64 pid;
-        u64 timespan;
-    } in = { volume, pid, 0 };
-
-    return serviceDispatchIn(&g_audoutaSrv, 3, in);
-}
-
-Result audoutaGetProcessRecordVolume(u64 pid, double* volume_out) {
-    if (hosversionBefore(4,0,0))
-        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
-
-    return serviceDispatchInOut(&g_audoutaSrv, 4, pid, *volume_out);
-}
-
-Result audoutaSetProcessRecordVolume(u64 pid, u64 delay, double volume) {
-    if (hosversionBefore(4,0,0))
-        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
-
-    const struct {
-        double volume;
-        u64 pid;
-        u64 timespan;
-    } in = { volume, pid, delay };
-
-    return serviceDispatchIn(&g_audoutaSrv, 5, in);
-}
-
-// IAudioOutManagerForDebugger
-Result audoutdInitialize(void) {
-    if (hosversionAtLeast(11,0,0))
-        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
-
-    return smGetService(&g_audoutdSrv, "audout:d");
-}
-
-void audoutdExit(void) {
-    serviceClose(&g_audoutdSrv);
-}
-
-Service* audoutdGetServiceSession(void) {
-    return &g_audoutdSrv;
-}
-
-Result audoutdRequestSuspendForDebug(u64 pid, u64 delay) {
-    const struct {
-        u64 pid;
-        u64 timespan;
-    } in = { pid, delay };
-
-    return serviceDispatchIn(&g_audoutdSrv, 0, in);
-}
-
-Result audoutdRequestResumeForDebug(u64 pid, u64 delay) {
-    const struct {
-        u64 pid;
-        u64 timespan;
-    } in = { pid, delay };
-
-    return serviceDispatchIn(&g_audoutdSrv, 1, in);
-}
+#define NX_SERVICE_ASSUME_NON_DOMAIN
+#include "audout.h"
+#include <switch.h>
+
+static Service g_audoutaSrv;
+static Service g_audoutdSrv;
+
+// AudioOutManagerForApplet
+Result audoutaInitialize(void) {
+    if (hosversionAtLeast(11,0,0))
+        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
+
+    return smGetService(&g_audoutaSrv, "audout:a");
+}
+
+void audoutaExit(void) {
+    serviceClose(&g_audoutaSrv);
+}
+
+Service* audoutaGetServiceSession(void) {
+    return &g_audoutaSrv;
+}
+
+Result audoutaRequestSuspendOld(u64 pid, u64 delay, Handle* handle_out) {
+    if (hosversionAtLeast(4,0,0))
+        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
+
+    const struct {
+        u64 pid;
+        u64 timespan;
+    } in = { pid, delay };
+
+    return serviceDispatchInOut(&g_audoutaSrv, 0, in, *handle_out);
+}
+
+Result audoutaRequestResumeOld(u64 pid, u64 delay, Handle* handle_out) {
+    if (hosversionAtLeast(4,0,0))
+        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
+
+    const struct {
+        u64 pid;
+        u64 timespan;
+    } in = { pid, delay };
+
+    return serviceDispatchInOut(&g_audoutaSrv, 1, in, *handle_out);
+}
+
+Result audoutaRequestSuspend(u64 pid, u64 delay) {
+    if (hosversionBefore(4,0,0))
+        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
+
+    const struct {
+        u64 pid;
+        u64 timespan;
+    } in = { pid, delay };
+
+    return serviceDispatchIn(&g_audoutaSrv, 0, in);
+}
+
+Result audoutaRequestResume(u64 pid, u64 delay) {
+    if (hosversionBefore(4,0,0))
+        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
+
+    const struct {
+        u64 pid;
+        u64 timespan;
+    } in = { pid, delay };
+
+    return serviceDispatchIn(&g_audoutaSrv, 1, in);
+}
+
+Result audoutaGetProcessMasterVolume(u64 pid, double* volume_out) {
+    return serviceDispatchInOut(&g_audoutaSrv, 2, pid, *volume_out);
+}
+
+Result audoutaSetProcessMasterVolume(u64 pid, u64 delay, double volume) {
+    const struct {
+        double volume;
+        u64 pid;
+        u64 timespan;
+    } in = { volume, pid, 0 };
+
+    return serviceDispatchIn(&g_audoutaSrv, 3, in);
+}
+
+Result audoutaGetProcessRecordVolume(u64 pid, double* volume_out) {
+    if (hosversionBefore(4,0,0))
+        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
+
+    return serviceDispatchInOut(&g_audoutaSrv, 4, pid, *volume_out);
+}
+
+Result audoutaSetProcessRecordVolume(u64 pid, u64 delay, double volume) {
+    if (hosversionBefore(4,0,0))
+        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
+
+    const struct {
+        double volume;
+        u64 pid;
+        u64 timespan;
+    } in = { volume, pid, delay };
+
+    return serviceDispatchIn(&g_audoutaSrv, 5, in);
+}
+
+// IAudioOutManagerForDebugger
+Result audoutdInitialize(void) {
+    if (hosversionAtLeast(11,0,0))
+        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
+
+    return smGetService(&g_audoutdSrv, "audout:d");
+}
+
+void audoutdExit(void) {
+    serviceClose(&g_audoutdSrv);
+}
+
+Service* audoutdGetServiceSession(void) {
+    return &g_audoutdSrv;
+}
+
+Result audoutdRequestSuspendForDebug(u64 pid, u64 delay) {
+    const struct {
+        u64 pid;
+        u64 timespan;
+    } in = { pid, delay };
+
+    return serviceDispatchIn(&g_audoutdSrv, 0, in);
+}
+
+Result audoutdRequestResumeForDebug(u64 pid, u64 delay) {
+    const struct {
+        u64 pid;
+        u64 timespan;
+    } in = { pid, delay };
+
+    return serviceDispatchIn(&g_audoutdSrv, 1, in);
+}
diff --git a/sys-tune/source/impl/audout.c.bak b/sys-tune/source/impl/audout.c.bak
index 595f271..490a190 100644
--- a/sys-tune/source/impl/audout.c.bak
+++ b/sys-tune/source/impl/audout.c.bak
@@ -1,138 +1,138 @@
-#define NX_SERVICE_ASSUME_NON_DOMAIN
-#include "audout.h"
-#include <switch.h>
-
-static Service g_audoutaSrv;
-static Service g_audoutdSrv;
-
-// AudioOutManagerForApplet
-Result audoutaInitialize(void) {
-    if (hosversionAtLeast(11,0,0))
-        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
-
-    return smGetService(&g_audoutaSrv, "audout:a");
-}
-
-void audoutaExit(void) {
-    serviceClose(&g_audoutaSrv);
-}
-
-Service* audoutaGetServiceSession(void) {
-    return &g_audoutaSrv;
-}
-
-Result audoutaRequestSuspendOld(u64 pid, u64 delay, Handle* handle_out) {
-    if (hosversionAtLeast(4,0,0))
-        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
-
-    const struct {
-        u64 pid;
-        u64 timespan;
-    } in = { pid, delay };
-
-    return serviceDispatchInOut(&g_audoutaSrv, 0, in, *handle_out);
-}
-
-Result audoutaRequestResumeOld(u64 pid, u64 delay, Handle* handle_out) {
-    if (hosversionAtLeast(4,0,0))
-        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
-
-    const struct {
-        u64 pid;
-        u64 timespan;
-    } in = { pid, delay };
-
-    return serviceDispatchInOut(&g_audoutaSrv, 1, in, *handle_out);
-}
-
-Result audoutaRequestSuspend(u64 pid, u64 delay) {
-    if (hosversionBefore(4,0,0))
-        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
-
-    const struct {
-        u64 pid;
-        u64 timespan;
-    } in = { pid, delay };
-
-    return serviceDispatchIn(&g_audoutaSrv, 0, in);
-}
-
-Result audoutaRequestResume(u64 pid, u64 delay) {
-    if (hosversionBefore(4,0,0))
-        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
-
-    const struct {
-        u64 pid;
-        u64 timespan;
-    } in = { pid, delay };
-
-    return serviceDispatchIn(&g_audoutaSrv, 1, in);
-}
-
-Result audoutaGetProcessMasterVolume(u64 pid, float* volume_out) {
-    return serviceDispatchInOut(&g_audoutaSrv, 2, pid, *volume_out);
-}
-
-Result audoutaSetProcessMasterVolume(u64 pid, u64 delay, float volume) {
-    const struct {
-        float volume;
-        u64 pid;
-        u64 timespan;
-    } in = { volume, pid, 0 };
-
-    return serviceDispatchIn(&g_audoutaSrv, 3, in);
-}
-
-Result audoutaGetProcessRecordVolume(u64 pid, float* volume_out) {
-    if (hosversionBefore(4,0,0))
-        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
-
-    return serviceDispatchInOut(&g_audoutaSrv, 4, pid, *volume_out);
-}
-
-Result audoutaSetProcessRecordVolume(u64 pid, u64 delay, float volume) {
-    if (hosversionBefore(4,0,0))
-        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
-
-    const struct {
-        float volume;
-        u64 pid;
-        u64 timespan;
-    } in = { volume, pid, delay };
-
-    return serviceDispatchIn(&g_audoutaSrv, 5, in);
-}
-
-// IAudioOutManagerForDebugger
-Result audoutdInitialize(void) {
-    if (hosversionAtLeast(11,0,0))
-        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
-
-    return smGetService(&g_audoutdSrv, "audout:d");
-}
-
-void audoutdExit(void) {
-    serviceClose(&g_audoutdSrv);
-}
-
-Service* audoutdGetServiceSession(void) {
-    return &g_audoutdSrv;
-}
-
-Result audoutdRequestSuspendForDebug(u64 pid, u64 delay) {
-    const struct {
-        u64 pid;
-        u64 timespan;
-    } in = { pid, delay };
-
-    return serviceDispatchIn(&g_audoutdSrv, 0, in);
-}
-
-Result audoutdRequestResumeForDebug(u64 pid, u64 delay) {
-    const struct {
-        u64 pid;
-        u64 timespan;
-    } in = { pid, delay };
-
-    return serviceDispatchIn(&g_audoutdSrv, 1, in);
-}
+#define NX_SERVICE_ASSUME_NON_DOMAIN
+#include "audout.h"
+#include <switch.h>
+
+static Service g_audoutaSrv;
+static Service g_audoutdSrv;
+
+// AudioOutManagerForApplet
+Result audoutaInitialize(void) {
+    if (hosversionAtLeast(11,0,0))
+        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
+
+    return smGetService(&g_audoutaSrv, "audout:a");
+}
+
+void audoutaExit(void) {
+    serviceClose(&g_audoutaSrv);
+}
+
+Service* audoutaGetServiceSession(void) {
+    return &g_audoutaSrv;
+}
+
+Result audoutaRequestSuspendOld(u64 pid, u64 delay, Handle* handle_out) {
+    if (hosversionAtLeast(4,0,0))
+        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
+
+    const struct {
+        u64 pid;
+        u64 timespan;
+    } in = { pid, delay };
+
+    return serviceDispatchInOut(&g_audoutaSrv, 0, in, *handle_out);
+}
+
+Result audoutaRequestResumeOld(u64 pid, u64 delay, Handle* handle_out) {
+    if (hosversionAtLeast(4,0,0))
+        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
+
+    const struct {
+        u64 pid;
+        u64 timespan;
+    } in = { pid, delay };
+
+    return serviceDispatchInOut(&g_audoutaSrv, 1, in, *handle_out);
+}
+
+Result audoutaRequestSuspend(u64 pid, u64 delay) {
+    if (hosversionBefore(4,0,0))
+        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
+
+    const struct {
+        u64 pid;
+        u64 timespan;
+    } in = { pid, delay };
+
+    return serviceDispatchIn(&g_audoutaSrv, 0, in);
+}
+
+Result audoutaRequestResume(u64 pid, u64 delay) {
+    if (hosversionBefore(4,0,0))
+        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
+
+    const struct {
+        u64 pid;
+        u64 timespan;
+    } in = { pid, delay };
+
+    return serviceDispatchIn(&g_audoutaSrv, 1, in);
+}
+
+Result audoutaGetProcessMasterVolume(u64 pid, float* volume_out) {
+    return serviceDispatchInOut(&g_audoutaSrv, 2, pid, *volume_out);
+}
+
+Result audoutaSetProcessMasterVolume(u64 pid, u64 delay, float volume) {
+    const struct {
+        float volume;
+        u64 pid;
+        u64 timespan;
+    } in = { volume, pid, 0 };
+
+    return serviceDispatchIn(&g_audoutaSrv, 3, in);
+}
+
+Result audoutaGetProcessRecordVolume(u64 pid, float* volume_out) {
+    if (hosversionBefore(4,0,0))
+        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
+
+    return serviceDispatchInOut(&g_audoutaSrv, 4, pid, *volume_out);
+}
+
+Result audoutaSetProcessRecordVolume(u64 pid, u64 delay, float volume) {
+    if (hosversionBefore(4,0,0))
+        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
+
+    const struct {
+        float volume;
+        u64 pid;
+        u64 timespan;
+    } in = { volume, pid, delay };
+
+    return serviceDispatchIn(&g_audoutaSrv, 5, in);
+}
+
+// IAudioOutManagerForDebugger
+Result audoutdInitialize(void) {
+    if (hosversionAtLeast(11,0,0))
+        return MAKERESULT(Module_Libnx, LibnxError_IncompatSysVer);
+
+    return smGetService(&g_audoutdSrv, "audout:d");
+}
+
+void audoutdExit(void) {
+    serviceClose(&g_audoutdSrv);
+}
+
+Service* audoutdGetServiceSession(void) {
+    return &g_audoutdSrv;
+}
+
+Result audoutdRequestSuspendForDebug(u64 pid, u64 delay) {
+    const struct {
+        u64 pid;
+        u64 timespan;
+    } in = { pid, delay };
+
+    return serviceDispatchIn(&g_audoutdSrv, 0, in);
+}
+
+Result audoutdRequestResumeForDebug(u64 pid, u64 delay) {
+    const struct {
+        u64 pid;
+        u64 timespan;
+    } in = { pid, delay };
+
+    return serviceDispatchIn(&g_audoutdSrv, 1, in);
+}
diff --git a/sys-tune/source/impl/dr_flac.h b/sys-tune/source/impl/dr_flac.h
index 542ff6b..a79a768 100644
--- a/sys-tune/source/impl/dr_flac.h
+++ b/sys-tune/source/impl/dr_flac.h
@@ -1,12099 +1,12099 @@
-/*
-FLAC audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
-dr_flac - v0.12.13 - 2020-05-16
-
-David Reid - mackron@gmail.com
-
-GitHub: https://github.com/mackron/dr_libs
-*/
-
-/*
-RELEASE NOTES - v0.12.0
-=======================
-Version 0.12.0 has breaking API changes including changes to the existing API and the removal of deprecated APIs.
-
-
-Improved Client-Defined Memory Allocation
------------------------------------------
-The main change with this release is the addition of a more flexible way of implementing custom memory allocation routines. The
-existing system of DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE are still in place and will be used by default when no custom
-allocation callbacks are specified.
-
-To use the new system, you pass in a pointer to a drflac_allocation_callbacks object to drflac_open() and family, like this:
-
-    void* my_malloc(size_t sz, void* pUserData)
-    {
-        return malloc(sz);
-    }
-    void* my_realloc(void* p, size_t sz, void* pUserData)
-    {
-        return realloc(p, sz);
-    }
-    void my_free(void* p, void* pUserData)
-    {
-        free(p);
-    }
-
-    ...
-
-    drflac_allocation_callbacks allocationCallbacks;
-    allocationCallbacks.pUserData = &myData;
-    allocationCallbacks.onMalloc  = my_malloc;
-    allocationCallbacks.onRealloc = my_realloc;
-    allocationCallbacks.onFree    = my_free;
-    drflac* pFlac = drflac_open_file("my_file.flac", &allocationCallbacks);
-
-The advantage of this new system is that it allows you to specify user data which will be passed in to the allocation routines.
-
-Passing in null for the allocation callbacks object will cause dr_flac to use defaults which is the same as DRFLAC_MALLOC,
-DRFLAC_REALLOC and DRFLAC_FREE and the equivalent of how it worked in previous versions.
-
-Every API that opens a drflac object now takes this extra parameter. These include the following:
-
-    drflac_open()
-    drflac_open_relaxed()
-    drflac_open_with_metadata()
-    drflac_open_with_metadata_relaxed()
-    drflac_open_file()
-    drflac_open_file_with_metadata()
-    drflac_open_memory()
-    drflac_open_memory_with_metadata()
-    drflac_open_and_read_pcm_frames_s32()
-    drflac_open_and_read_pcm_frames_s16()
-    drflac_open_and_read_pcm_frames_f32()
-    drflac_open_file_and_read_pcm_frames_s32()
-    drflac_open_file_and_read_pcm_frames_s16()
-    drflac_open_file_and_read_pcm_frames_f32()
-    drflac_open_memory_and_read_pcm_frames_s32()
-    drflac_open_memory_and_read_pcm_frames_s16()
-    drflac_open_memory_and_read_pcm_frames_f32()
-
-
-
-Optimizations
--------------
-Seeking performance has been greatly improved. A new binary search based seeking algorithm has been introduced which significantly
-improves performance over the brute force method which was used when no seek table was present. Seek table based seeking also takes
-advantage of the new binary search seeking system to further improve performance there as well. Note that this depends on CRC which
-means it will be disabled when DR_FLAC_NO_CRC is used.
-
-The SSE4.1 pipeline has been cleaned up and optimized. You should see some improvements with decoding speed of 24-bit files in
-particular. 16-bit streams should also see some improvement.
-
-drflac_read_pcm_frames_s16() has been optimized. Previously this sat on top of drflac_read_pcm_frames_s32() and performed it's s32
-to s16 conversion in a second pass. This is now all done in a single pass. This includes SSE2 and ARM NEON optimized paths.
-
-A minor optimization has been implemented for drflac_read_pcm_frames_s32(). This will now use an SSE2 optimized pipeline for stereo
-channel reconstruction which is the last part of the decoding process.
-
-The ARM build has seen a few improvements. The CLZ (count leading zeroes) and REV (byte swap) instructions are now used when
-compiling with GCC and Clang which is achieved using inline assembly. The CLZ instruction requires ARM architecture version 5 at
-compile time and the REV instruction requires ARM architecture version 6.
-
-An ARM NEON optimized pipeline has been implemented. To enable this you'll need to add -mfpu=neon to the command line when compiling.
-
-
-Removed APIs
-------------
-The following APIs were deprecated in version 0.11.0 and have been completely removed in version 0.12.0:
-
-    drflac_read_s32()                   -> drflac_read_pcm_frames_s32()
-    drflac_read_s16()                   -> drflac_read_pcm_frames_s16()
-    drflac_read_f32()                   -> drflac_read_pcm_frames_f32()
-    drflac_seek_to_sample()             -> drflac_seek_to_pcm_frame()
-    drflac_open_and_decode_s32()        -> drflac_open_and_read_pcm_frames_s32()
-    drflac_open_and_decode_s16()        -> drflac_open_and_read_pcm_frames_s16()
-    drflac_open_and_decode_f32()        -> drflac_open_and_read_pcm_frames_f32()
-    drflac_open_and_decode_file_s32()   -> drflac_open_file_and_read_pcm_frames_s32()
-    drflac_open_and_decode_file_s16()   -> drflac_open_file_and_read_pcm_frames_s16()
-    drflac_open_and_decode_file_f32()   -> drflac_open_file_and_read_pcm_frames_f32()
-    drflac_open_and_decode_memory_s32() -> drflac_open_memory_and_read_pcm_frames_s32()
-    drflac_open_and_decode_memory_s16() -> drflac_open_memory_and_read_pcm_frames_s16()
-    drflac_open_and_decode_memory_f32() -> drflac_open_memroy_and_read_pcm_frames_f32()
-
-Prior versions of dr_flac operated on a per-sample basis whereas now it operates on PCM frames. The removed APIs all relate
-to the old per-sample APIs. You now need to use the "pcm_frame" versions.
-*/
-
-
-/*
-Introduction
-============
-dr_flac is a single file library. To use it, do something like the following in one .c file.
-
-    ```c
-    #define DR_FLAC_IMPLEMENTATION
-    #include "dr_flac.h"
-    ```
-
-You can then #include this file in other parts of the program as you would with any other header file. To decode audio data, do something like the following:
-
-    ```c
-    drflac* pFlac = drflac_open_file("MySong.flac", NULL);
-    if (pFlac == NULL) {
-        // Failed to open FLAC file
-    }
-
-    drflac_int32* pSamples = malloc(pFlac->totalPCMFrameCount * pFlac->channels * sizeof(drflac_int32));
-    drflac_uint64 numberOfInterleavedSamplesActuallyRead = drflac_read_pcm_frames_s32(pFlac, pFlac->totalPCMFrameCount, pSamples);
-    ```
-
-The drflac object represents the decoder. It is a transparent type so all the information you need, such as the number of channels and the bits per sample,
-should be directly accessible - just make sure you don't change their values. Samples are always output as interleaved signed 32-bit PCM. In the example above
-a native FLAC stream was opened, however dr_flac has seamless support for Ogg encapsulated FLAC streams as well.
-
-You do not need to decode the entire stream in one go - you just specify how many samples you'd like at any given time and the decoder will give you as many
-samples as it can, up to the amount requested. Later on when you need the next batch of samples, just call it again. Example:
-
-    ```c
-    while (drflac_read_pcm_frames_s32(pFlac, chunkSizeInPCMFrames, pChunkSamples) > 0) {
-        do_something();
-    }
-    ```
-
-You can seek to a specific PCM frame with `drflac_seek_to_pcm_frame()`.
-
-If you just want to quickly decode an entire FLAC file in one go you can do something like this:
-
-    ```c
-    unsigned int channels;
-    unsigned int sampleRate;
-    drflac_uint64 totalPCMFrameCount;
-    drflac_int32* pSampleData = drflac_open_file_and_read_pcm_frames_s32("MySong.flac", &channels, &sampleRate, &totalPCMFrameCount, NULL);
-    if (pSampleData == NULL) {
-        // Failed to open and decode FLAC file.
-    }
-
-    ...
-
-    drflac_free(pSampleData);
-    ```
-
-You can read samples as signed 16-bit integer and 32-bit floating-point PCM with the *_s16() and *_f32() family of APIs respectively, but note that these
-should be considered lossy.
-
-
-If you need access to metadata (album art, etc.), use `drflac_open_with_metadata()`, `drflac_open_file_with_metdata()` or `drflac_open_memory_with_metadata()`.
-The rationale for keeping these APIs separate is that they're slightly slower than the normal versions and also just a little bit harder to use. dr_flac
-reports metadata to the application through the use of a callback, and every metadata block is reported before `drflac_open_with_metdata()` returns.
-
-The main opening APIs (`drflac_open()`, etc.) will fail if the header is not present. The presents a problem in certain scenarios such as broadcast style
-streams or internet radio where the header may not be present because the user has started playback mid-stream. To handle this, use the relaxed APIs:
-    
-    `drflac_open_relaxed()`
-    `drflac_open_with_metadata_relaxed()`
-
-It is not recommended to use these APIs for file based streams because a missing header would usually indicate a corrupt or perverse file. In addition, these
-APIs can take a long time to initialize because they may need to spend a lot of time finding the first frame.
-
-
-
-Build Options
-=============
-#define these options before including this file.
-
-#define DR_FLAC_NO_STDIO
-  Disable `drflac_open_file()` and family.
-
-#define DR_FLAC_NO_OGG
-  Disables support for Ogg/FLAC streams.
-
-#define DR_FLAC_BUFFER_SIZE <number>
-  Defines the size of the internal buffer to store data from onRead(). This buffer is used to reduce the number of calls back to the client for more data.
-  Larger values means more memory, but better performance. My tests show diminishing returns after about 4KB (which is the default). Consider reducing this if
-  you have a very efficient implementation of onRead(), or increase it if it's very inefficient. Must be a multiple of 8.
-
-#define DR_FLAC_NO_CRC
-  Disables CRC checks. This will offer a performance boost when CRC is unnecessary. This will disable binary search seeking. When seeking, the seek table will
-  be used if available. Otherwise the seek will be performed using brute force.
-
-#define DR_FLAC_NO_SIMD
-  Disables SIMD optimizations (SSE on x86/x64 architectures, NEON on ARM architectures). Use this if you are having compatibility issues with your compiler.
-
-
-
-Notes
-=====
-- dr_flac does not support changing the sample rate nor channel count mid stream.
-- dr_flac is not thread-safe, but its APIs can be called from any thread so long as you do your own synchronization.
-- When using Ogg encapsulation, a corrupted metadata block will result in `drflac_open_with_metadata()` and `drflac_open()` returning inconsistent samples due
-  to differences in corrupted stream recorvery logic between the two APIs.
-*/
-
-#ifndef dr_flac_h
-#define dr_flac_h
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define DRFLAC_STRINGIFY(x)      #x
-#define DRFLAC_XSTRINGIFY(x)     DRFLAC_STRINGIFY(x)
-
-#define DRFLAC_VERSION_MAJOR     0
-#define DRFLAC_VERSION_MINOR     12
-#define DRFLAC_VERSION_REVISION  13
-#define DRFLAC_VERSION_STRING    DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MAJOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MINOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_REVISION)
-
-#include <stddef.h> /* For size_t. */
-
-/* Sized types. Prefer built-in types. Fall back to stdint. */
-#ifdef _MSC_VER
-    #if defined(__clang__)
-        #pragma GCC diagnostic push
-        #pragma GCC diagnostic ignored "-Wlanguage-extension-token"
-        #pragma GCC diagnostic ignored "-Wlong-long"        
-        #pragma GCC diagnostic ignored "-Wc++11-long-long"
-    #endif
-    typedef   signed __int8  drflac_int8;
-    typedef unsigned __int8  drflac_uint8;
-    typedef   signed __int16 drflac_int16;
-    typedef unsigned __int16 drflac_uint16;
-    typedef   signed __int32 drflac_int32;
-    typedef unsigned __int32 drflac_uint32;
-    typedef   signed __int64 drflac_int64;
-    typedef unsigned __int64 drflac_uint64;
-    #if defined(__clang__)
-        #pragma GCC diagnostic pop
-    #endif
-#else
-    #include <stdint.h>
-    typedef int8_t           drflac_int8;
-    typedef uint8_t          drflac_uint8;
-    typedef int16_t          drflac_int16;
-    typedef uint16_t         drflac_uint16;
-    typedef int32_t          drflac_int32;
-    typedef uint32_t         drflac_uint32;
-    typedef int64_t          drflac_int64;
-    typedef uint64_t         drflac_uint64;
-#endif
-typedef drflac_uint8         drflac_bool8;
-typedef drflac_uint32        drflac_bool32;
-#define DRFLAC_TRUE          1
-#define DRFLAC_FALSE         0
-
-#if !defined(DRFLAC_API)
-    #if defined(DRFLAC_DLL)
-        #if defined(_WIN32)
-            #define DRFLAC_DLL_IMPORT  __declspec(dllimport)
-            #define DRFLAC_DLL_EXPORT  __declspec(dllexport)
-            #define DRFLAC_DLL_PRIVATE static
-        #else
-            #if defined(__GNUC__) && __GNUC__ >= 4
-                #define DRFLAC_DLL_IMPORT  __attribute__((visibility("default")))
-                #define DRFLAC_DLL_EXPORT  __attribute__((visibility("default")))
-                #define DRFLAC_DLL_PRIVATE __attribute__((visibility("hidden")))
-            #else
-                #define DRFLAC_DLL_IMPORT
-                #define DRFLAC_DLL_EXPORT
-                #define DRFLAC_DLL_PRIVATE static
-            #endif
-        #endif
-
-        #if defined(DR_FLAC_IMPLEMENTATION) || defined(DRFLAC_IMPLEMENTATION)
-            #define DRFLAC_API  DRFLAC_DLL_EXPORT
-        #else
-            #define DRFLAC_API  DRFLAC_DLL_IMPORT
-        #endif
-        #define DRFLAC_PRIVATE DRFLAC_DLL_PRIVATE
-    #else
-        #define DRFLAC_API extern
-        #define DRFLAC_PRIVATE static
-    #endif
-#endif
-
-#if defined(_MSC_VER) && _MSC_VER >= 1700   /* Visual Studio 2012 */
-    #define DRFLAC_DEPRECATED       __declspec(deprecated)
-#elif (defined(__GNUC__) && __GNUC__ >= 4)  /* GCC 4 */
-    #define DRFLAC_DEPRECATED       __attribute__((deprecated))
-#elif defined(__has_feature)                /* Clang */
-    #if __has_feature(attribute_deprecated)
-        #define DRFLAC_DEPRECATED   __attribute__((deprecated))
-    #else
-        #define DRFLAC_DEPRECATED
-    #endif
-#else
-    #define DRFLAC_DEPRECATED
-#endif
-
-DRFLAC_API void drflac_version(drflac_uint32* pMajor, drflac_uint32* pMinor, drflac_uint32* pRevision);
-DRFLAC_API const char* drflac_version_string();
-
-/*
-As data is read from the client it is placed into an internal buffer for fast access. This controls the size of that buffer. Larger values means more speed,
-but also more memory. In my testing there is diminishing returns after about 4KB, but you can fiddle with this to suit your own needs. Must be a multiple of 8.
-*/
-#ifndef DR_FLAC_BUFFER_SIZE
-#define DR_FLAC_BUFFER_SIZE   4096
-#endif
-
-/* Check if we can enable 64-bit optimizations. */
-#if defined(_WIN64) || defined(_LP64) || defined(__LP64__)
-#define DRFLAC_64BIT
-#endif
-
-#ifdef DRFLAC_64BIT
-typedef drflac_uint64 drflac_cache_t;
-#else
-typedef drflac_uint32 drflac_cache_t;
-#endif
-
-/* The various metadata block types. */
-#define DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO       0
-#define DRFLAC_METADATA_BLOCK_TYPE_PADDING          1
-#define DRFLAC_METADATA_BLOCK_TYPE_APPLICATION      2
-#define DRFLAC_METADATA_BLOCK_TYPE_SEEKTABLE        3
-#define DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT   4
-#define DRFLAC_METADATA_BLOCK_TYPE_CUESHEET         5
-#define DRFLAC_METADATA_BLOCK_TYPE_PICTURE          6
-#define DRFLAC_METADATA_BLOCK_TYPE_INVALID          127
-
-/* The various picture types specified in the PICTURE block. */
-#define DRFLAC_PICTURE_TYPE_OTHER                   0
-#define DRFLAC_PICTURE_TYPE_FILE_ICON               1
-#define DRFLAC_PICTURE_TYPE_OTHER_FILE_ICON         2
-#define DRFLAC_PICTURE_TYPE_COVER_FRONT             3
-#define DRFLAC_PICTURE_TYPE_COVER_BACK              4
-#define DRFLAC_PICTURE_TYPE_LEAFLET_PAGE            5
-#define DRFLAC_PICTURE_TYPE_MEDIA                   6
-#define DRFLAC_PICTURE_TYPE_LEAD_ARTIST             7
-#define DRFLAC_PICTURE_TYPE_ARTIST                  8
-#define DRFLAC_PICTURE_TYPE_CONDUCTOR               9
-#define DRFLAC_PICTURE_TYPE_BAND                    10
-#define DRFLAC_PICTURE_TYPE_COMPOSER                11
-#define DRFLAC_PICTURE_TYPE_LYRICIST                12
-#define DRFLAC_PICTURE_TYPE_RECORDING_LOCATION      13
-#define DRFLAC_PICTURE_TYPE_DURING_RECORDING        14
-#define DRFLAC_PICTURE_TYPE_DURING_PERFORMANCE      15
-#define DRFLAC_PICTURE_TYPE_SCREEN_CAPTURE          16
-#define DRFLAC_PICTURE_TYPE_BRIGHT_COLORED_FISH     17
-#define DRFLAC_PICTURE_TYPE_ILLUSTRATION            18
-#define DRFLAC_PICTURE_TYPE_BAND_LOGOTYPE           19
-#define DRFLAC_PICTURE_TYPE_PUBLISHER_LOGOTYPE      20
-
-typedef enum
-{
-    drflac_container_native,
-    drflac_container_ogg,
-    drflac_container_unknown
-} drflac_container;
-
-typedef enum
-{
-    drflac_seek_origin_start,
-    drflac_seek_origin_current
-} drflac_seek_origin;
-
-/* Packing is important on this structure because we map this directly to the raw data within the SEEKTABLE metadata block. */
-#pragma pack(2)
-typedef struct
-{
-    drflac_uint64 firstPCMFrame;
-    drflac_uint64 flacFrameOffset;   /* The offset from the first byte of the header of the first frame. */
-    drflac_uint16 pcmFrameCount;
-} drflac_seekpoint;
-#pragma pack()
-
-typedef struct
-{
-    drflac_uint16 minBlockSizeInPCMFrames;
-    drflac_uint16 maxBlockSizeInPCMFrames;
-    drflac_uint32 minFrameSizeInPCMFrames;
-    drflac_uint32 maxFrameSizeInPCMFrames;
-    drflac_uint32 sampleRate;
-    drflac_uint8  channels;
-    drflac_uint8  bitsPerSample;
-    drflac_uint64 totalPCMFrameCount;
-    drflac_uint8  md5[16];
-} drflac_streaminfo;
-
-typedef struct
-{
-    /* The metadata type. Use this to know how to interpret the data below. */
-    drflac_uint32 type;
-
-    /*
-    A pointer to the raw data. This points to a temporary buffer so don't hold on to it. It's best to
-    not modify the contents of this buffer. Use the structures below for more meaningful and structured
-    information about the metadata. It's possible for this to be null.
-    */
-    const void* pRawData;
-
-    /* The size in bytes of the block and the buffer pointed to by pRawData if it's non-NULL. */
-    drflac_uint32 rawDataSize;
-
-    union
-    {
-        drflac_streaminfo streaminfo;
-
-        struct
-        {
-            int unused;
-        } padding;
-
-        struct
-        {
-            drflac_uint32 id;
-            const void* pData;
-            drflac_uint32 dataSize;
-        } application;
-
-        struct
-        {
-            drflac_uint32 seekpointCount;
-            const drflac_seekpoint* pSeekpoints;
-        } seektable;
-
-        struct
-        {
-            drflac_uint32 vendorLength;
-            const char* vendor;
-            drflac_uint32 commentCount;
-            const void* pComments;
-        } vorbis_comment;
-
-        struct
-        {
-            char catalog[128];
-            drflac_uint64 leadInSampleCount;
-            drflac_bool32 isCD;
-            drflac_uint8 trackCount;
-            const void* pTrackData;
-        } cuesheet;
-
-        struct
-        {
-            drflac_uint32 type;
-            drflac_uint32 mimeLength;
-            const char* mime;
-            drflac_uint32 descriptionLength;
-            const char* description;
-            drflac_uint32 width;
-            drflac_uint32 height;
-            drflac_uint32 colorDepth;
-            drflac_uint32 indexColorCount;
-            drflac_uint32 pictureDataSize;
-            const drflac_uint8* pPictureData;
-        } picture;
-    } data;
-} drflac_metadata;
-
-
-/*
-Callback for when data needs to be read from the client.
-
-
-Parameters
-----------
-pUserData (in)
-    The user data that was passed to drflac_open() and family.
-
-pBufferOut (out)
-    The output buffer.
-
-bytesToRead (in)
-    The number of bytes to read.
-
-
-Return Value
-------------
-The number of bytes actually read.
-
-
-Remarks
--------
-A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until either the entire bytesToRead is filled or
-you have reached the end of the stream.
-*/
-typedef size_t (* drflac_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead);
-
-/*
-Callback for when data needs to be seeked.
-
-
-Parameters
-----------
-pUserData (in)
-    The user data that was passed to drflac_open() and family.
-
-offset (in)
-    The number of bytes to move, relative to the origin. Will never be negative.
-
-origin (in)
-    The origin of the seek - the current position or the start of the stream.
-
-
-Return Value
-------------
-Whether or not the seek was successful.
-
-
-Remarks
--------
-The offset will never be negative. Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which will be
-either drflac_seek_origin_start or drflac_seek_origin_current.
-
-When seeking to a PCM frame using drflac_seek_to_pcm_frame(), dr_flac may call this with an offset beyond the end of the FLAC stream. This needs to be detected
-and handled by returning DRFLAC_FALSE.
-*/
-typedef drflac_bool32 (* drflac_seek_proc)(void* pUserData, int offset, drflac_seek_origin origin);
-
-/*
-Callback for when a metadata block is read.
-
-
-Parameters
-----------
-pUserData (in)
-    The user data that was passed to drflac_open() and family.
-
-pMetadata (in)
-    A pointer to a structure containing the data of the metadata block.
-
-
-Remarks
--------
-Use pMetadata->type to determine which metadata block is being handled and how to read the data.
-*/
-typedef void (* drflac_meta_proc)(void* pUserData, drflac_metadata* pMetadata);
-
-
-typedef struct
-{
-    void* pUserData;
-    void* (* onMalloc)(size_t sz, void* pUserData);
-    void* (* onRealloc)(void* p, size_t sz, void* pUserData);
-    void  (* onFree)(void* p, void* pUserData);
-} drflac_allocation_callbacks;
-
-/* Structure for internal use. Only used for decoders opened with drflac_open_memory. */
-typedef struct
-{
-    const drflac_uint8* data;
-    size_t dataSize;
-    size_t currentReadPos;
-} drflac__memory_stream;
-
-/* Structure for internal use. Used for bit streaming. */
-typedef struct
-{
-    /* The function to call when more data needs to be read. */
-    drflac_read_proc onRead;
-
-    /* The function to call when the current read position needs to be moved. */
-    drflac_seek_proc onSeek;
-
-    /* The user data to pass around to onRead and onSeek. */
-    void* pUserData;
-
-
-    /*
-    The number of unaligned bytes in the L2 cache. This will always be 0 until the end of the stream is hit. At the end of the
-    stream there will be a number of bytes that don't cleanly fit in an L1 cache line, so we use this variable to know whether
-    or not the bistreamer needs to run on a slower path to read those last bytes. This will never be more than sizeof(drflac_cache_t).
-    */
-    size_t unalignedByteCount;
-
-    /* The content of the unaligned bytes. */
-    drflac_cache_t unalignedCache;
-
-    /* The index of the next valid cache line in the "L2" cache. */
-    drflac_uint32 nextL2Line;
-
-    /* The number of bits that have been consumed by the cache. This is used to determine how many valid bits are remaining. */
-    drflac_uint32 consumedBits;
-
-    /*
-    The cached data which was most recently read from the client. There are two levels of cache. Data flows as such:
-    Client -> L2 -> L1. The L2 -> L1 movement is aligned and runs on a fast path in just a few instructions.
-    */
-    drflac_cache_t cacheL2[DR_FLAC_BUFFER_SIZE/sizeof(drflac_cache_t)];
-    drflac_cache_t cache;
-
-    /*
-    CRC-16. This is updated whenever bits are read from the bit stream. Manually set this to 0 to reset the CRC. For FLAC, this
-    is reset to 0 at the beginning of each frame.
-    */
-    drflac_uint16 crc16;
-    drflac_cache_t crc16Cache;              /* A cache for optimizing CRC calculations. This is filled when when the L1 cache is reloaded. */
-    drflac_uint32 crc16CacheIgnoredBytes;   /* The number of bytes to ignore when updating the CRC-16 from the CRC-16 cache. */
-} drflac_bs;
-
-typedef struct
-{
-    /* The type of the subframe: SUBFRAME_CONSTANT, SUBFRAME_VERBATIM, SUBFRAME_FIXED or SUBFRAME_LPC. */
-    drflac_uint8 subframeType;
-
-    /* The number of wasted bits per sample as specified by the sub-frame header. */
-    drflac_uint8 wastedBitsPerSample;
-
-    /* The order to use for the prediction stage for SUBFRAME_FIXED and SUBFRAME_LPC. */
-    drflac_uint8 lpcOrder;
-
-    /* A pointer to the buffer containing the decoded samples in the subframe. This pointer is an offset from drflac::pExtraData. */
-    drflac_int32* pSamplesS32;
-} drflac_subframe;
-
-typedef struct
-{
-    /*
-    If the stream uses variable block sizes, this will be set to the index of the first PCM frame. If fixed block sizes are used, this will
-    always be set to 0. This is 64-bit because the decoded PCM frame number will be 36 bits.
-    */
-    drflac_uint64 pcmFrameNumber;
-
-    /*
-    If the stream uses fixed block sizes, this will be set to the frame number. If variable block sizes are used, this will always be 0. This
-    is 32-bit because in fixed block sizes, the maximum frame number will be 31 bits.
-    */
-    drflac_uint32 flacFrameNumber;
-
-    /* The sample rate of this frame. */
-    drflac_uint32 sampleRate;
-
-    /* The number of PCM frames in each sub-frame within this frame. */
-    drflac_uint16 blockSizeInPCMFrames;
-
-    /*
-    The channel assignment of this frame. This is not always set to the channel count. If interchannel decorrelation is being used this
-    will be set to DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE, DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE or DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE.
-    */
-    drflac_uint8 channelAssignment;
-
-    /* The number of bits per sample within this frame. */
-    drflac_uint8 bitsPerSample;
-
-    /* The frame's CRC. */
-    drflac_uint8 crc8;
-} drflac_frame_header;
-
-typedef struct
-{
-    /* The header. */
-    drflac_frame_header header;
-
-    /*
-    The number of PCM frames left to be read in this FLAC frame. This is initially set to the block size. As PCM frames are read,
-    this will be decremented. When it reaches 0, the decoder will see this frame as fully consumed and load the next frame.
-    */
-    drflac_uint32 pcmFramesRemaining;
-
-    /* The list of sub-frames within the frame. There is one sub-frame for each channel, and there's a maximum of 8 channels. */
-    drflac_subframe subframes[8];
-} drflac_frame;
-
-typedef struct
-{
-    /* The function to call when a metadata block is read. */
-    drflac_meta_proc onMeta;
-
-    /* The user data posted to the metadata callback function. */
-    void* pUserDataMD;
-
-    /* Memory allocation callbacks. */
-    drflac_allocation_callbacks allocationCallbacks;
-
-
-    /* The sample rate. Will be set to something like 44100. */
-    drflac_uint32 sampleRate;
-
-    /*
-    The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. Maximum 8. This is set based on the
-    value specified in the STREAMINFO block.
-    */
-    drflac_uint8 channels;
-
-    /* The bits per sample. Will be set to something like 16, 24, etc. */
-    drflac_uint8 bitsPerSample;
-
-    /* The maximum block size, in samples. This number represents the number of samples in each channel (not combined). */
-    drflac_uint16 maxBlockSizeInPCMFrames;
-
-    /*
-    The total number of PCM Frames making up the stream. Can be 0 in which case it's still a valid stream, but just means
-    the total PCM frame count is unknown. Likely the case with streams like internet radio.
-    */
-    drflac_uint64 totalPCMFrameCount;
-
-
-    /* The container type. This is set based on whether or not the decoder was opened from a native or Ogg stream. */
-    drflac_container container;
-
-    /* The number of seekpoints in the seektable. */
-    drflac_uint32 seekpointCount;
-
-
-    /* Information about the frame the decoder is currently sitting on. */
-    drflac_frame currentFLACFrame;
-
-
-    /* The index of the PCM frame the decoder is currently sitting on. This is only used for seeking. */
-    drflac_uint64 currentPCMFrame;
-
-    /* The position of the first FLAC frame in the stream. This is only ever used for seeking. */
-    drflac_uint64 firstFLACFramePosInBytes;
-
-
-    /* A hack to avoid a malloc() when opening a decoder with drflac_open_memory(). */
-    drflac__memory_stream memoryStream;
-
-
-    /* A pointer to the decoded sample data. This is an offset of pExtraData. */
-    drflac_int32* pDecodedSamples;
-
-    /* A pointer to the seek table. This is an offset of pExtraData, or NULL if there is no seek table. */
-    drflac_seekpoint* pSeekpoints;
-
-    /* Internal use only. Only used with Ogg containers. Points to a drflac_oggbs object. This is an offset of pExtraData. */
-    void* _oggbs;
-
-    /* Internal use only. Used for profiling and testing different seeking modes. */
-    drflac_bool32 _noSeekTableSeek    : 1;
-    drflac_bool32 _noBinarySearchSeek : 1;
-    drflac_bool32 _noBruteForceSeek   : 1;
-
-    /* The bit streamer. The raw FLAC data is fed through this object. */
-    drflac_bs bs;
-
-    /* Variable length extra data. We attach this to the end of the object so we can avoid unnecessary mallocs. */
-    drflac_uint8 pExtraData[1];
-} drflac;
-
-
-/*
-Opens a FLAC decoder.
-
-
-Parameters
-----------
-onRead (in)
-    The function to call when data needs to be read from the client.
-
-onSeek (in)
-    The function to call when the read position of the client data needs to move.
-
-pUserData (in, optional)
-    A pointer to application defined data that will be passed to onRead and onSeek.
-
-pAllocationCallbacks (in, optional)
-    A pointer to application defined callbacks for managing memory allocations.
-
-
-Return Value
-------------
-Returns a pointer to an object representing the decoder.
-
-
-Remarks
--------
-Close the decoder with `drflac_close()`.
-
-`pAllocationCallbacks` can be NULL in which case it will use `DRFLAC_MALLOC`, `DRFLAC_REALLOC` and `DRFLAC_FREE`.
-
-This function will automatically detect whether or not you are attempting to open a native or Ogg encapsulated FLAC, both of which should work seamlessly
-without any manual intervention. Ogg encapsulation also works with multiplexed streams which basically means it can play FLAC encoded audio tracks in videos.
-
-This is the lowest level function for opening a FLAC stream. You can also use `drflac_open_file()` and `drflac_open_memory()` to open the stream from a file or
-from a block of memory respectively.
-
-The STREAMINFO block must be present for this to succeed. Use `drflac_open_relaxed()` to open a FLAC stream where the header may not be present.
-
-
-Seek Also
----------
-drflac_open_file()
-drflac_open_memory()
-drflac_open_with_metadata()
-drflac_close()
-*/
-DRFLAC_API drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/*
-Opens a FLAC stream with relaxed validation of the header block.
-
-
-Parameters
-----------
-onRead (in)
-    The function to call when data needs to be read from the client.
-
-onSeek (in)
-    The function to call when the read position of the client data needs to move.
-
-container (in)
-    Whether or not the FLAC stream is encapsulated using standard FLAC encapsulation or Ogg encapsulation.
-
-pUserData (in, optional)
-    A pointer to application defined data that will be passed to onRead and onSeek.
-
-pAllocationCallbacks (in, optional)
-    A pointer to application defined callbacks for managing memory allocations.
-
-
-Return Value
-------------
-A pointer to an object representing the decoder.
-
-
-Remarks
--------
-The same as drflac_open(), except attempts to open the stream even when a header block is not present.
-
-Because the header is not necessarily available, the caller must explicitly define the container (Native or Ogg). Do not set this to `drflac_container_unknown`
-as that is for internal use only.
-
-Opening in relaxed mode will continue reading data from onRead until it finds a valid frame. If a frame is never found it will continue forever. To abort,
-force your `onRead` callback to return 0, which dr_flac will use as an indicator that the end of the stream was found.
-*/
-DRFLAC_API drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/*
-Opens a FLAC decoder and notifies the caller of the metadata chunks (album art, etc.).
-
-
-Parameters
-----------
-onRead (in)
-    The function to call when data needs to be read from the client.
-
-onSeek (in)
-    The function to call when the read position of the client data needs to move.
-
-onMeta (in)
-    The function to call for every metadata block.
-
-pUserData (in, optional)
-    A pointer to application defined data that will be passed to onRead, onSeek and onMeta.
-
-pAllocationCallbacks (in, optional)
-    A pointer to application defined callbacks for managing memory allocations.
-
-
-Return Value
-------------
-A pointer to an object representing the decoder.
-
-
-Remarks
--------
-Close the decoder with `drflac_close()`.
-
-`pAllocationCallbacks` can be NULL in which case it will use `DRFLAC_MALLOC`, `DRFLAC_REALLOC` and `DRFLAC_FREE`.
-
-This is slower than `drflac_open()`, so avoid this one if you don't need metadata. Internally, this will allocate and free memory on the heap for every
-metadata block except for STREAMINFO and PADDING blocks.
-
-The caller is notified of the metadata via the `onMeta` callback. All metadata blocks will be handled before the function returns.
-
-The STREAMINFO block must be present for this to succeed. Use `drflac_open_with_metadata_relaxed()` to open a FLAC stream where the header may not be present.
-
-Note that this will behave inconsistently with `drflac_open()` if the stream is an Ogg encapsulated stream and a metadata block is corrupted. This is due to
-the way the Ogg stream recovers from corrupted pages. When `drflac_open_with_metadata()` is being used, the open routine will try to read the contents of the
-metadata block, whereas `drflac_open()` will simply seek past it (for the sake of efficiency). This inconsistency can result in different samples being
-returned depending on whether or not the stream is being opened with metadata.
-
-
-Seek Also
----------
-drflac_open_file_with_metadata()
-drflac_open_memory_with_metadata()
-drflac_open()
-drflac_close()
-*/
-DRFLAC_API drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/*
-The same as drflac_open_with_metadata(), except attempts to open the stream even when a header block is not present.
-
-See Also
---------
-drflac_open_with_metadata()
-drflac_open_relaxed()
-*/
-DRFLAC_API drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/*
-Closes the given FLAC decoder.
-
-
-Parameters
-----------
-pFlac (in)
-    The decoder to close.
-
-
-Remarks
--------
-This will destroy the decoder object.
-
-
-See Also
---------
-drflac_open()
-drflac_open_with_metadata()
-drflac_open_file()
-drflac_open_file_w()
-drflac_open_file_with_metadata()
-drflac_open_file_with_metadata_w()
-drflac_open_memory()
-drflac_open_memory_with_metadata()
-*/
-DRFLAC_API void drflac_close(drflac* pFlac);
-
-
-/*
-Reads sample data from the given FLAC decoder, output as interleaved signed 32-bit PCM.
-
-
-Parameters
-----------
-pFlac (in)
-    The decoder.
-
-framesToRead (in)
-    The number of PCM frames to read.
-
-pBufferOut (out, optional)
-    A pointer to the buffer that will receive the decoded samples.
-
-
-Return Value
-------------
-Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end.
-
-
-Remarks
--------
-pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked.
-*/
-DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut);
-
-
-/*
-Reads sample data from the given FLAC decoder, output as interleaved signed 16-bit PCM.
-
-
-Parameters
-----------
-pFlac (in)
-    The decoder.
-
-framesToRead (in)
-    The number of PCM frames to read.
-
-pBufferOut (out, optional)
-    A pointer to the buffer that will receive the decoded samples.
-
-
-Return Value
-------------
-Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end.
-
-
-Remarks
--------
-pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked.
-
-Note that this is lossy for streams where the bits per sample is larger than 16.
-*/
-DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut);
-
-/*
-Reads sample data from the given FLAC decoder, output as interleaved 32-bit floating point PCM.
-
-
-Parameters
-----------
-pFlac (in)
-    The decoder.
-
-framesToRead (in)
-    The number of PCM frames to read.
-
-pBufferOut (out, optional)
-    A pointer to the buffer that will receive the decoded samples.
-
-
-Return Value
-------------
-Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end.
-
-
-Remarks
--------
-pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked.
-
-Note that this should be considered lossy due to the nature of floating point numbers not being able to exactly represent every possible number.
-*/
-DRFLAC_API drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, double* pBufferOut);
-
-/*
-Seeks to the PCM frame at the given index.
-
-
-Parameters
-----------
-pFlac (in)
-    The decoder.
-
-pcmFrameIndex (in)
-    The index of the PCM frame to seek to. See notes below.
-
-
-Return Value
--------------
-`DRFLAC_TRUE` if successful; `DRFLAC_FALSE` otherwise.
-*/
-DRFLAC_API drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex);
-
-
-
-#ifndef DR_FLAC_NO_STDIO
-/*
-Opens a FLAC decoder from the file at the given path.
-
-
-Parameters
-----------
-pFileName (in)
-    The path of the file to open, either absolute or relative to the current directory.
-
-pAllocationCallbacks (in, optional)
-    A pointer to application defined callbacks for managing memory allocations.
-
-
-Return Value
-------------
-A pointer to an object representing the decoder.
-
-
-Remarks
--------
-Close the decoder with drflac_close().
-
-
-Remarks
--------
-This will hold a handle to the file until the decoder is closed with drflac_close(). Some platforms will restrict the number of files a process can have open
-at any given time, so keep this mind if you have many decoders open at the same time.
-
-
-See Also
---------
-drflac_open_file_with_metadata()
-drflac_open()
-drflac_close()
-*/
-DRFLAC_API drflac* drflac_open_file(const char* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks);
-DRFLAC_API drflac* drflac_open_file_w(const wchar_t* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/*
-Opens a FLAC decoder from the file at the given path and notifies the caller of the metadata chunks (album art, etc.)
-
-
-Parameters
-----------
-pFileName (in)
-    The path of the file to open, either absolute or relative to the current directory.
-
-pAllocationCallbacks (in, optional)
-    A pointer to application defined callbacks for managing memory allocations.
-
-onMeta (in)
-    The callback to fire for each metadata block.
-
-pUserData (in)
-    A pointer to the user data to pass to the metadata callback.
-
-pAllocationCallbacks (in)
-    A pointer to application defined callbacks for managing memory allocations.
-
-
-Remarks
--------
-Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled.
-
-
-See Also
---------
-drflac_open_with_metadata()
-drflac_open()
-drflac_close()
-*/
-DRFLAC_API drflac* drflac_open_file_with_metadata(const char* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
-DRFLAC_API drflac* drflac_open_file_with_metadata_w(const wchar_t* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
-#endif
-
-/*
-Opens a FLAC decoder from a pre-allocated block of memory
-
-
-Parameters
-----------
-pData (in)
-    A pointer to the raw encoded FLAC data.
-
-dataSize (in)
-    The size in bytes of `data`.
-
-pAllocationCallbacks (in)
-    A pointer to application defined callbacks for managing memory allocations.
-
-
-Return Value
-------------
-A pointer to an object representing the decoder.
-
-
-Remarks
--------
-This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for the lifetime of the decoder.
-
-
-See Also
---------
-drflac_open()
-drflac_close()
-*/
-DRFLAC_API drflac* drflac_open_memory(const void* pData, size_t dataSize, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/*
-Opens a FLAC decoder from a pre-allocated block of memory and notifies the caller of the metadata chunks (album art, etc.)
-
-
-Parameters
-----------
-pData (in)
-    A pointer to the raw encoded FLAC data.
-
-dataSize (in)
-    The size in bytes of `data`.
-
-onMeta (in)
-    The callback to fire for each metadata block.
-
-pUserData (in)
-    A pointer to the user data to pass to the metadata callback.
-
-pAllocationCallbacks (in)
-    A pointer to application defined callbacks for managing memory allocations.
-
-
-Remarks
--------
-Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled.
-
-
-See Also
--------
-drflac_open_with_metadata()
-drflac_open()
-drflac_close()
-*/
-DRFLAC_API drflac* drflac_open_memory_with_metadata(const void* pData, size_t dataSize, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-
-
-/* High Level APIs */
-
-/*
-Opens a FLAC stream from the given callbacks and fully decodes it in a single operation. The return value is a
-pointer to the sample data as interleaved signed 32-bit PCM. The returned data must be freed with drflac_free().
-
-You can pass in custom memory allocation callbacks via the pAllocationCallbacks parameter. This can be NULL in which
-case it will use DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE.
-
-Sometimes a FLAC file won't keep track of the total sample count. In this situation the function will continuously
-read samples into a dynamically sized buffer on the heap until no samples are left.
-
-Do not call this function on a broadcast type of stream (like internet radio streams and whatnot).
-*/
-DRFLAC_API drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/* Same as drflac_open_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */
-DRFLAC_API drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/* Same as drflac_open_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */
-DRFLAC_API double* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-#ifndef DR_FLAC_NO_STDIO
-/* Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a file. */
-DRFLAC_API drflac_int32* drflac_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/* Same as drflac_open_file_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */
-DRFLAC_API drflac_int16* drflac_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/* Same as drflac_open_file_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */
-DRFLAC_API double* drflac_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
-#endif
-
-/* Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a block of memory. */
-DRFLAC_API drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/* Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */
-DRFLAC_API drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/* Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */
-DRFLAC_API double* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/*
-Frees memory that was allocated internally by dr_flac.
-
-Set pAllocationCallbacks to the same object that was passed to drflac_open_*_and_read_pcm_frames_*(). If you originally passed in NULL, pass in NULL for this.
-*/
-DRFLAC_API void drflac_free(void* p, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-
-/* Structure representing an iterator for vorbis comments in a VORBIS_COMMENT metadata block. */
-typedef struct
-{
-    drflac_uint32 countRemaining;
-    const char* pRunningData;
-} drflac_vorbis_comment_iterator;
-
-/*
-Initializes a vorbis comment iterator. This can be used for iterating over the vorbis comments in a VORBIS_COMMENT
-metadata block.
-*/
-DRFLAC_API void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const void* pComments);
-
-/*
-Goes to the next vorbis comment in the given iterator. If null is returned it means there are no more comments. The
-returned string is NOT null terminated.
-*/
-DRFLAC_API const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut);
-
-
-/* Structure representing an iterator for cuesheet tracks in a CUESHEET metadata block. */
-typedef struct
-{
-    drflac_uint32 countRemaining;
-    const char* pRunningData;
-} drflac_cuesheet_track_iterator;
-
-/* Packing is important on this structure because we map this directly to the raw data within the CUESHEET metadata block. */
-#pragma pack(4)
-typedef struct
-{
-    drflac_uint64 offset;
-    drflac_uint8 index;
-    drflac_uint8 reserved[3];
-} drflac_cuesheet_track_index;
-#pragma pack()
-
-typedef struct
-{
-    drflac_uint64 offset;
-    drflac_uint8 trackNumber;
-    char ISRC[12];
-    drflac_bool8 isAudio;
-    drflac_bool8 preEmphasis;
-    drflac_uint8 indexCount;
-    const drflac_cuesheet_track_index* pIndexPoints;
-} drflac_cuesheet_track;
-
-/*
-Initializes a cuesheet track iterator. This can be used for iterating over the cuesheet tracks in a CUESHEET metadata
-block.
-*/
-DRFLAC_API void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_uint32 trackCount, const void* pTrackData);
-
-/* Goes to the next cuesheet track in the given iterator. If DRFLAC_FALSE is returned it means there are no more comments. */
-DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack);
-
-
-#ifdef __cplusplus
-}
-#endif
-#endif  /* dr_flac_h */
-
-
-/************************************************************************************************************************************************************
- ************************************************************************************************************************************************************
-
- IMPLEMENTATION
-
- ************************************************************************************************************************************************************
- ************************************************************************************************************************************************************/
-#if defined(DR_FLAC_IMPLEMENTATION) || defined(DRFLAC_IMPLEMENTATION)
-
-/* Disable some annoying warnings. */
-#if defined(__GNUC__)
-    #pragma GCC diagnostic push
-    #if __GNUC__ >= 7
-    #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
-    #endif
-#endif
-
-#ifdef __linux__
-    #ifndef _BSD_SOURCE
-        #define _BSD_SOURCE
-    #endif
-    #ifndef __USE_BSD
-        #define __USE_BSD
-    #endif
-    #include <endian.h>
-#endif
-
-#include <stdlib.h>
-#include <string.h>
-
-#ifdef _MSC_VER
-    #define DRFLAC_INLINE __forceinline
-#elif defined(__GNUC__)
-    /*
-    I've had a bug report where GCC is emitting warnings about functions possibly not being inlineable. This warning happens when
-    the __attribute__((always_inline)) attribute is defined without an "inline" statement. I think therefore there must be some
-    case where "__inline__" is not always defined, thus the compiler emitting these warnings. When using -std=c89 or -ansi on the
-    command line, we cannot use the "inline" keyword and instead need to use "__inline__". In an attempt to work around this issue
-    I am using "__inline__" only when we're compiling in strict ANSI mode.
-    */
-    #if defined(__STRICT_ANSI__)
-        #define DRFLAC_INLINE __inline__ __attribute__((always_inline))
-    #else
-        #define DRFLAC_INLINE inline __attribute__((always_inline))
-    #endif
-#else
-    #define DRFLAC_INLINE
-#endif
-
-/* CPU architecture. */
-#if defined(__x86_64__) || defined(_M_X64)
-    #define DRFLAC_X64
-#elif defined(__i386) || defined(_M_IX86)
-    #define DRFLAC_X86
-#elif defined(__arm__) || defined(_M_ARM)
-    #define DRFLAC_ARM
-#endif
-
-/* Intrinsics Support */
-#if !defined(DR_FLAC_NO_SIMD)
-    #if defined(DRFLAC_X64) || defined(DRFLAC_X86)
-        #if defined(_MSC_VER) && !defined(__clang__)
-            /* MSVC. */
-            #if _MSC_VER >= 1400 && !defined(DRFLAC_NO_SSE2)    /* 2005 */
-                #define DRFLAC_SUPPORT_SSE2
-            #endif
-            #if _MSC_VER >= 1600 && !defined(DRFLAC_NO_SSE41)   /* 2010 */
-                #define DRFLAC_SUPPORT_SSE41
-            #endif
-        #else
-            /* Assume GNUC-style. */
-            #if defined(__SSE2__) && !defined(DRFLAC_NO_SSE2)
-                #define DRFLAC_SUPPORT_SSE2
-            #endif
-            #if defined(__SSE4_1__) && !defined(DRFLAC_NO_SSE41)
-                #define DRFLAC_SUPPORT_SSE41
-            #endif
-        #endif
-
-        /* If at this point we still haven't determined compiler support for the intrinsics just fall back to __has_include. */
-        #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include)
-            #if !defined(DRFLAC_SUPPORT_SSE2) && !defined(DRFLAC_NO_SSE2) && __has_include(<emmintrin.h>)
-                #define DRFLAC_SUPPORT_SSE2
-            #endif
-            #if !defined(DRFLAC_SUPPORT_SSE41) && !defined(DRFLAC_NO_SSE41) && __has_include(<smmintrin.h>)
-                #define DRFLAC_SUPPORT_SSE41
-            #endif
-        #endif
-
-        #if defined(DRFLAC_SUPPORT_SSE41)
-            #include <smmintrin.h>
-        #elif defined(DRFLAC_SUPPORT_SSE2)
-            #include <emmintrin.h>
-        #endif
-    #endif
-
-    #if defined(DRFLAC_ARM)
-        #if !defined(DRFLAC_NO_NEON) && (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64))
-            #define DRFLAC_SUPPORT_NEON
-        #endif
-
-        /* Fall back to looking for the #include file. */
-        #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include)
-            #if !defined(DRFLAC_SUPPORT_NEON) && !defined(DRFLAC_NO_NEON) && __has_include(<arm_neon.h>)
-                #define DRFLAC_SUPPORT_NEON
-            #endif
-        #endif
-
-        #if defined(DRFLAC_SUPPORT_NEON)
-            #include <arm_neon.h>
-        #endif
-    #endif
-#endif
-
-/* Compile-time CPU feature support. */
-#if !defined(DR_FLAC_NO_SIMD) && (defined(DRFLAC_X86) || defined(DRFLAC_X64))
-    #if defined(_MSC_VER) && !defined(__clang__)
-        #if _MSC_VER >= 1400
-            #include <intrin.h>
-            static void drflac__cpuid(int info[4], int fid)
-            {
-                __cpuid(info, fid);
-            }
-        #else
-            #define DRFLAC_NO_CPUID
-        #endif
-    #else
-        #if defined(__GNUC__) || defined(__clang__)
-            static void drflac__cpuid(int info[4], int fid)
-            {
-                /*
-                It looks like the -fPIC option uses the ebx register which GCC complains about. We can work around this by just using a different register, the
-                specific register of which I'm letting the compiler decide on. The "k" prefix is used to specify a 32-bit register. The {...} syntax is for
-                supporting different assembly dialects.
-
-                What's basically happening is that we're saving and restoring the ebx register manually.
-                */
-                #if defined(DRFLAC_X86) && defined(__PIC__)
-                    __asm__ __volatile__ (
-                        "xchg{l} {%%}ebx, %k1;"
-                        "cpuid;"
-                        "xchg{l} {%%}ebx, %k1;"
-                        : "=a"(info[0]), "=&r"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(fid), "c"(0)
-                    );
-                #else
-                    __asm__ __volatile__ (
-                        "cpuid" : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(fid), "c"(0)
-                    );
-                #endif
-            }
-        #else
-            #define DRFLAC_NO_CPUID
-        #endif
-    #endif
-#else
-    #define DRFLAC_NO_CPUID
-#endif
-
-static DRFLAC_INLINE drflac_bool32 drflac_has_sse2(void)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE2)
-        #if defined(DRFLAC_X64)
-            return DRFLAC_TRUE;    /* 64-bit targets always support SSE2. */
-        #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE2__)
-            return DRFLAC_TRUE;    /* If the compiler is allowed to freely generate SSE2 code we can assume support. */
-        #else
-            #if defined(DRFLAC_NO_CPUID)
-                return DRFLAC_FALSE;
-            #else
-                int info[4];
-                drflac__cpuid(info, 1);
-                return (info[3] & (1 << 26)) != 0;
-            #endif
-        #endif
-    #else
-        return DRFLAC_FALSE;       /* SSE2 is only supported on x86 and x64 architectures. */
-    #endif
-#else
-    return DRFLAC_FALSE;           /* No compiler support. */
-#endif
-}
-
-static DRFLAC_INLINE drflac_bool32 drflac_has_sse41(void)
-{
-#if defined(DRFLAC_SUPPORT_SSE41)
-    #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE41)
-        #if defined(DRFLAC_X64)
-            return DRFLAC_TRUE;    /* 64-bit targets always support SSE4.1. */
-        #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE4_1__)
-            return DRFLAC_TRUE;    /* If the compiler is allowed to freely generate SSE41 code we can assume support. */
-        #else
-            #if defined(DRFLAC_NO_CPUID)
-                return DRFLAC_FALSE;
-            #else
-                int info[4];
-                drflac__cpuid(info, 1);
-                return (info[2] & (1 << 19)) != 0;
-            #endif
-        #endif
-    #else
-        return DRFLAC_FALSE;       /* SSE41 is only supported on x86 and x64 architectures. */
-    #endif
-#else
-    return DRFLAC_FALSE;           /* No compiler support. */
-#endif
-}
-
-
-#if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(DRFLAC_X86) || defined(DRFLAC_X64))
-    #define DRFLAC_HAS_LZCNT_INTRINSIC
-#elif (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)))
-    #define DRFLAC_HAS_LZCNT_INTRINSIC
-#elif defined(__clang__)
-    #if defined(__has_builtin)
-        #if __has_builtin(__builtin_clzll) || __has_builtin(__builtin_clzl)
-            #define DRFLAC_HAS_LZCNT_INTRINSIC
-        #endif
-    #endif
-#endif
-
-#if defined(_MSC_VER) && _MSC_VER >= 1400
-    #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
-    #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
-    #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
-#elif defined(__clang__)
-    #if defined(__has_builtin)
-        #if __has_builtin(__builtin_bswap16)
-            #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
-        #endif
-        #if __has_builtin(__builtin_bswap32)
-            #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
-        #endif
-        #if __has_builtin(__builtin_bswap64)
-            #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
-        #endif
-    #endif
-#elif defined(__GNUC__)
-    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
-        #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
-        #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
-    #endif
-    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
-        #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
-    #endif
-#endif
-
-
-/* Standard library stuff. */
-#ifndef DRFLAC_ASSERT
-#include <assert.h>
-#define DRFLAC_ASSERT(expression)           assert(expression)
-#endif
-#ifndef DRFLAC_MALLOC
-#define DRFLAC_MALLOC(sz)                   malloc((sz))
-#endif
-#ifndef DRFLAC_REALLOC
-#define DRFLAC_REALLOC(p, sz)               realloc((p), (sz))
-#endif
-#ifndef DRFLAC_FREE
-#define DRFLAC_FREE(p)                      free((p))
-#endif
-#ifndef DRFLAC_COPY_MEMORY
-#define DRFLAC_COPY_MEMORY(dst, src, sz)    memcpy((dst), (src), (sz))
-#endif
-#ifndef DRFLAC_ZERO_MEMORY
-#define DRFLAC_ZERO_MEMORY(p, sz)           memset((p), 0, (sz))
-#endif
-#ifndef DRFLAC_ZERO_OBJECT
-#define DRFLAC_ZERO_OBJECT(p)               DRFLAC_ZERO_MEMORY((p), sizeof(*(p)))
-#endif
-
-#define DRFLAC_MAX_SIMD_VECTOR_SIZE                     64  /* 64 for AVX-512 in the future. */
-
-typedef drflac_int32 drflac_result;
-#define DRFLAC_SUCCESS                                   0
-#define DRFLAC_ERROR                                    -1   /* A generic error. */
-#define DRFLAC_INVALID_ARGS                             -2
-#define DRFLAC_INVALID_OPERATION                        -3
-#define DRFLAC_OUT_OF_MEMORY                            -4
-#define DRFLAC_OUT_OF_RANGE                             -5
-#define DRFLAC_ACCESS_DENIED                            -6
-#define DRFLAC_DOES_NOT_EXIST                           -7
-#define DRFLAC_ALREADY_EXISTS                           -8
-#define DRFLAC_TOO_MANY_OPEN_FILES                      -9
-#define DRFLAC_INVALID_FILE                             -10
-#define DRFLAC_TOO_BIG                                  -11
-#define DRFLAC_PATH_TOO_LONG                            -12
-#define DRFLAC_NAME_TOO_LONG                            -13
-#define DRFLAC_NOT_DIRECTORY                            -14
-#define DRFLAC_IS_DIRECTORY                             -15
-#define DRFLAC_DIRECTORY_NOT_EMPTY                      -16
-#define DRFLAC_END_OF_FILE                              -17
-#define DRFLAC_NO_SPACE                                 -18
-#define DRFLAC_BUSY                                     -19
-#define DRFLAC_IO_ERROR                                 -20
-#define DRFLAC_INTERRUPT                                -21
-#define DRFLAC_UNAVAILABLE                              -22
-#define DRFLAC_ALREADY_IN_USE                           -23
-#define DRFLAC_BAD_ADDRESS                              -24
-#define DRFLAC_BAD_SEEK                                 -25
-#define DRFLAC_BAD_PIPE                                 -26
-#define DRFLAC_DEADLOCK                                 -27
-#define DRFLAC_TOO_MANY_LINKS                           -28
-#define DRFLAC_NOT_IMPLEMENTED                          -29
-#define DRFLAC_NO_MESSAGE                               -30
-#define DRFLAC_BAD_MESSAGE                              -31
-#define DRFLAC_NO_DATA_AVAILABLE                        -32
-#define DRFLAC_INVALID_DATA                             -33
-#define DRFLAC_TIMEOUT                                  -34
-#define DRFLAC_NO_NETWORK                               -35
-#define DRFLAC_NOT_UNIQUE                               -36
-#define DRFLAC_NOT_SOCKET                               -37
-#define DRFLAC_NO_ADDRESS                               -38
-#define DRFLAC_BAD_PROTOCOL                             -39
-#define DRFLAC_PROTOCOL_UNAVAILABLE                     -40
-#define DRFLAC_PROTOCOL_NOT_SUPPORTED                   -41
-#define DRFLAC_PROTOCOL_FAMILY_NOT_SUPPORTED            -42
-#define DRFLAC_ADDRESS_FAMILY_NOT_SUPPORTED             -43
-#define DRFLAC_SOCKET_NOT_SUPPORTED                     -44
-#define DRFLAC_CONNECTION_RESET                         -45
-#define DRFLAC_ALREADY_CONNECTED                        -46
-#define DRFLAC_NOT_CONNECTED                            -47
-#define DRFLAC_CONNECTION_REFUSED                       -48
-#define DRFLAC_NO_HOST                                  -49
-#define DRFLAC_IN_PROGRESS                              -50
-#define DRFLAC_CANCELLED                                -51
-#define DRFLAC_MEMORY_ALREADY_MAPPED                    -52
-#define DRFLAC_AT_END                                   -53
-#define DRFLAC_CRC_MISMATCH                             -128
-
-#define DRFLAC_SUBFRAME_CONSTANT                        0
-#define DRFLAC_SUBFRAME_VERBATIM                        1
-#define DRFLAC_SUBFRAME_FIXED                           8
-#define DRFLAC_SUBFRAME_LPC                             32
-#define DRFLAC_SUBFRAME_RESERVED                        255
-
-#define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE  0
-#define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2 1
-
-#define DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT           0
-#define DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE             8
-#define DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE            9
-#define DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE              10
-
-#define drflac_align(x, a)                              ((((x) + (a) - 1) / (a)) * (a))
-
-
-DRFLAC_API void drflac_version(drflac_uint32* pMajor, drflac_uint32* pMinor, drflac_uint32* pRevision)
-{
-    if (pMajor) {
-        *pMajor = DRFLAC_VERSION_MAJOR;
-    }
-
-    if (pMinor) {
-        *pMinor = DRFLAC_VERSION_MINOR;
-    }
-
-    if (pRevision) {
-        *pRevision = DRFLAC_VERSION_REVISION;
-    }
-}
-
-DRFLAC_API const char* drflac_version_string()
-{
-    return DRFLAC_VERSION_STRING;
-}
-
-
-/* CPU caps. */
-#if defined(__has_feature)
-    #if __has_feature(thread_sanitizer)
-        #define DRFLAC_NO_THREAD_SANITIZE __attribute__((no_sanitize("thread")))
-    #else
-        #define DRFLAC_NO_THREAD_SANITIZE
-    #endif
-#else
-    #define DRFLAC_NO_THREAD_SANITIZE
-#endif
-
-#if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
-static drflac_bool32 drflac__gIsLZCNTSupported = DRFLAC_FALSE;
-#endif
-
-#ifndef DRFLAC_NO_CPUID
-static drflac_bool32 drflac__gIsSSE2Supported  = DRFLAC_FALSE;
-static drflac_bool32 drflac__gIsSSE41Supported = DRFLAC_FALSE;
-
-/*
-I've had a bug report that Clang's ThreadSanitizer presents a warning in this function. Having reviewed this, this does
-actually make sense. However, since CPU caps should never differ for a running process, I don't think the trade off of
-complicating internal API's by passing around CPU caps versus just disabling the warnings is worthwhile. I'm therefore
-just going to disable these warnings. This is disabled via the DRFLAC_NO_THREAD_SANITIZE attribute.
-*/
-DRFLAC_NO_THREAD_SANITIZE static void drflac__init_cpu_caps(void)
-{
-    static drflac_bool32 isCPUCapsInitialized = DRFLAC_FALSE;
-
-    if (!isCPUCapsInitialized) {
-        /* LZCNT */
-#if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
-        int info[4] = {0};
-        drflac__cpuid(info, 0x80000001);
-        drflac__gIsLZCNTSupported = (info[2] & (1 << 5)) != 0;
-#endif
-
-        /* SSE2 */
-        drflac__gIsSSE2Supported = drflac_has_sse2();
-
-        /* SSE4.1 */
-        drflac__gIsSSE41Supported = drflac_has_sse41();
-
-        /* Initialized. */
-        isCPUCapsInitialized = DRFLAC_TRUE;
-    }
-}
-#else
-static drflac_bool32 drflac__gIsNEONSupported  = DRFLAC_FALSE;
-
-static DRFLAC_INLINE drflac_bool32 drflac__has_neon(void)
-{
-#if defined(DRFLAC_SUPPORT_NEON)
-    #if defined(DRFLAC_ARM) && !defined(DRFLAC_NO_NEON)
-        #if (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64))
-            return DRFLAC_TRUE;    /* If the compiler is allowed to freely generate NEON code we can assume support. */
-        #else
-            /* TODO: Runtime check. */
-            return DRFLAC_FALSE;
-        #endif
-    #else
-        return DRFLAC_FALSE;       /* NEON is only supported on ARM architectures. */
-    #endif
-#else
-    return DRFLAC_FALSE;           /* No compiler support. */
-#endif
-}
-
-DRFLAC_NO_THREAD_SANITIZE static void drflac__init_cpu_caps(void)
-{
-    drflac__gIsNEONSupported = drflac__has_neon();
-
-#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5)
-    drflac__gIsLZCNTSupported = DRFLAC_TRUE;
-#endif
-}
-#endif
-
-
-/* Endian Management */
-static DRFLAC_INLINE drflac_bool32 drflac__is_little_endian(void)
-{
-#if defined(DRFLAC_X86) || defined(DRFLAC_X64)
-    return DRFLAC_TRUE;
-#elif defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN
-    return DRFLAC_TRUE;
-#else
-    int n = 1;
-    return (*(char*)&n) == 1;
-#endif
-}
-
-static DRFLAC_INLINE drflac_uint16 drflac__swap_endian_uint16(drflac_uint16 n)
-{
-#ifdef DRFLAC_HAS_BYTESWAP16_INTRINSIC
-    #if defined(_MSC_VER)
-        return _byteswap_ushort(n);
-    #elif defined(__GNUC__) || defined(__clang__)
-        return __builtin_bswap16(n);
-    #else
-        #error "This compiler does not support the byte swap intrinsic."
-    #endif
-#else
-    return ((n & 0xFF00) >> 8) |
-           ((n & 0x00FF) << 8);
-#endif
-}
-
-static DRFLAC_INLINE drflac_uint32 drflac__swap_endian_uint32(drflac_uint32 n)
-{
-#ifdef DRFLAC_HAS_BYTESWAP32_INTRINSIC
-    #if defined(_MSC_VER)
-        return _byteswap_ulong(n);
-    #elif defined(__GNUC__) || defined(__clang__)
-        #if defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 6) && !defined(DRFLAC_64BIT)   /* <-- 64-bit inline assembly has not been tested, so disabling for now. */
-            /* Inline assembly optimized implementation for ARM. In my testing, GCC does not generate optimized code with __builtin_bswap32(). */
-            drflac_uint32 r;
-            __asm__ __volatile__ (
-            #if defined(DRFLAC_64BIT)
-                "rev %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(n)   /* <-- This is untested. If someone in the community could test this, that would be appreciated! */
-            #else
-                "rev %[out], %[in]" : [out]"=r"(r) : [in]"r"(n)
-            #endif
-            );
-            return r;
-        #else
-            return __builtin_bswap32(n);
-        #endif
-    #else
-        #error "This compiler does not support the byte swap intrinsic."
-    #endif
-#else
-    return ((n & 0xFF000000) >> 24) |
-           ((n & 0x00FF0000) >>  8) |
-           ((n & 0x0000FF00) <<  8) |
-           ((n & 0x000000FF) << 24);
-#endif
-}
-
-static DRFLAC_INLINE drflac_uint64 drflac__swap_endian_uint64(drflac_uint64 n)
-{
-#ifdef DRFLAC_HAS_BYTESWAP64_INTRINSIC
-    #if defined(_MSC_VER)
-        return _byteswap_uint64(n);
-    #elif defined(__GNUC__) || defined(__clang__)
-        return __builtin_bswap64(n);
-    #else
-        #error "This compiler does not support the byte swap intrinsic."
-    #endif
-#else
-    return ((n & (drflac_uint64)0xFF00000000000000) >> 56) |
-           ((n & (drflac_uint64)0x00FF000000000000) >> 40) |
-           ((n & (drflac_uint64)0x0000FF0000000000) >> 24) |
-           ((n & (drflac_uint64)0x000000FF00000000) >>  8) |
-           ((n & (drflac_uint64)0x00000000FF000000) <<  8) |
-           ((n & (drflac_uint64)0x0000000000FF0000) << 24) |
-           ((n & (drflac_uint64)0x000000000000FF00) << 40) |
-           ((n & (drflac_uint64)0x00000000000000FF) << 56);
-#endif
-}
-
-
-static DRFLAC_INLINE drflac_uint16 drflac__be2host_16(drflac_uint16 n)
-{
-    if (drflac__is_little_endian()) {
-        return drflac__swap_endian_uint16(n);
-    }
-
-    return n;
-}
-
-static DRFLAC_INLINE drflac_uint32 drflac__be2host_32(drflac_uint32 n)
-{
-    if (drflac__is_little_endian()) {
-        return drflac__swap_endian_uint32(n);
-    }
-
-    return n;
-}
-
-static DRFLAC_INLINE drflac_uint64 drflac__be2host_64(drflac_uint64 n)
-{
-    if (drflac__is_little_endian()) {
-        return drflac__swap_endian_uint64(n);
-    }
-
-    return n;
-}
-
-
-static DRFLAC_INLINE drflac_uint32 drflac__le2host_32(drflac_uint32 n)
-{
-    if (!drflac__is_little_endian()) {
-        return drflac__swap_endian_uint32(n);
-    }
-
-    return n;
-}
-
-
-static DRFLAC_INLINE drflac_uint32 drflac__unsynchsafe_32(drflac_uint32 n)
-{
-    drflac_uint32 result = 0;
-    result |= (n & 0x7F000000) >> 3;
-    result |= (n & 0x007F0000) >> 2;
-    result |= (n & 0x00007F00) >> 1;
-    result |= (n & 0x0000007F) >> 0;
-
-    return result;
-}
-
-
-
-/* The CRC code below is based on this document: http://zlib.net/crc_v3.txt */
-static drflac_uint8 drflac__crc8_table[] = {
-    0x00, 0x07, 0x0E, 0x09, 0x1C, 0x1B, 0x12, 0x15, 0x38, 0x3F, 0x36, 0x31, 0x24, 0x23, 0x2A, 0x2D,
-    0x70, 0x77, 0x7E, 0x79, 0x6C, 0x6B, 0x62, 0x65, 0x48, 0x4F, 0x46, 0x41, 0x54, 0x53, 0x5A, 0x5D,
-    0xE0, 0xE7, 0xEE, 0xE9, 0xFC, 0xFB, 0xF2, 0xF5, 0xD8, 0xDF, 0xD6, 0xD1, 0xC4, 0xC3, 0xCA, 0xCD,
-    0x90, 0x97, 0x9E, 0x99, 0x8C, 0x8B, 0x82, 0x85, 0xA8, 0xAF, 0xA6, 0xA1, 0xB4, 0xB3, 0xBA, 0xBD,
-    0xC7, 0xC0, 0xC9, 0xCE, 0xDB, 0xDC, 0xD5, 0xD2, 0xFF, 0xF8, 0xF1, 0xF6, 0xE3, 0xE4, 0xED, 0xEA,
-    0xB7, 0xB0, 0xB9, 0xBE, 0xAB, 0xAC, 0xA5, 0xA2, 0x8F, 0x88, 0x81, 0x86, 0x93, 0x94, 0x9D, 0x9A,
-    0x27, 0x20, 0x29, 0x2E, 0x3B, 0x3C, 0x35, 0x32, 0x1F, 0x18, 0x11, 0x16, 0x03, 0x04, 0x0D, 0x0A,
-    0x57, 0x50, 0x59, 0x5E, 0x4B, 0x4C, 0x45, 0x42, 0x6F, 0x68, 0x61, 0x66, 0x73, 0x74, 0x7D, 0x7A,
-    0x89, 0x8E, 0x87, 0x80, 0x95, 0x92, 0x9B, 0x9C, 0xB1, 0xB6, 0xBF, 0xB8, 0xAD, 0xAA, 0xA3, 0xA4,
-    0xF9, 0xFE, 0xF7, 0xF0, 0xE5, 0xE2, 0xEB, 0xEC, 0xC1, 0xC6, 0xCF, 0xC8, 0xDD, 0xDA, 0xD3, 0xD4,
-    0x69, 0x6E, 0x67, 0x60, 0x75, 0x72, 0x7B, 0x7C, 0x51, 0x56, 0x5F, 0x58, 0x4D, 0x4A, 0x43, 0x44,
-    0x19, 0x1E, 0x17, 0x10, 0x05, 0x02, 0x0B, 0x0C, 0x21, 0x26, 0x2F, 0x28, 0x3D, 0x3A, 0x33, 0x34,
-    0x4E, 0x49, 0x40, 0x47, 0x52, 0x55, 0x5C, 0x5B, 0x76, 0x71, 0x78, 0x7F, 0x6A, 0x6D, 0x64, 0x63,
-    0x3E, 0x39, 0x30, 0x37, 0x22, 0x25, 0x2C, 0x2B, 0x06, 0x01, 0x08, 0x0F, 0x1A, 0x1D, 0x14, 0x13,
-    0xAE, 0xA9, 0xA0, 0xA7, 0xB2, 0xB5, 0xBC, 0xBB, 0x96, 0x91, 0x98, 0x9F, 0x8A, 0x8D, 0x84, 0x83,
-    0xDE, 0xD9, 0xD0, 0xD7, 0xC2, 0xC5, 0xCC, 0xCB, 0xE6, 0xE1, 0xE8, 0xEF, 0xFA, 0xFD, 0xF4, 0xF3
-};
-
-static drflac_uint16 drflac__crc16_table[] = {
-    0x0000, 0x8005, 0x800F, 0x000A, 0x801B, 0x001E, 0x0014, 0x8011,
-    0x8033, 0x0036, 0x003C, 0x8039, 0x0028, 0x802D, 0x8027, 0x0022,
-    0x8063, 0x0066, 0x006C, 0x8069, 0x0078, 0x807D, 0x8077, 0x0072,
-    0x0050, 0x8055, 0x805F, 0x005A, 0x804B, 0x004E, 0x0044, 0x8041,
-    0x80C3, 0x00C6, 0x00CC, 0x80C9, 0x00D8, 0x80DD, 0x80D7, 0x00D2,
-    0x00F0, 0x80F5, 0x80FF, 0x00FA, 0x80EB, 0x00EE, 0x00E4, 0x80E1,
-    0x00A0, 0x80A5, 0x80AF, 0x00AA, 0x80BB, 0x00BE, 0x00B4, 0x80B1,
-    0x8093, 0x0096, 0x009C, 0x8099, 0x0088, 0x808D, 0x8087, 0x0082,
-    0x8183, 0x0186, 0x018C, 0x8189, 0x0198, 0x819D, 0x8197, 0x0192,
-    0x01B0, 0x81B5, 0x81BF, 0x01BA, 0x81AB, 0x01AE, 0x01A4, 0x81A1,
-    0x01E0, 0x81E5, 0x81EF, 0x01EA, 0x81FB, 0x01FE, 0x01F4, 0x81F1,
-    0x81D3, 0x01D6, 0x01DC, 0x81D9, 0x01C8, 0x81CD, 0x81C7, 0x01C2,
-    0x0140, 0x8145, 0x814F, 0x014A, 0x815B, 0x015E, 0x0154, 0x8151,
-    0x8173, 0x0176, 0x017C, 0x8179, 0x0168, 0x816D, 0x8167, 0x0162,
-    0x8123, 0x0126, 0x012C, 0x8129, 0x0138, 0x813D, 0x8137, 0x0132,
-    0x0110, 0x8115, 0x811F, 0x011A, 0x810B, 0x010E, 0x0104, 0x8101,
-    0x8303, 0x0306, 0x030C, 0x8309, 0x0318, 0x831D, 0x8317, 0x0312,
-    0x0330, 0x8335, 0x833F, 0x033A, 0x832B, 0x032E, 0x0324, 0x8321,
-    0x0360, 0x8365, 0x836F, 0x036A, 0x837B, 0x037E, 0x0374, 0x8371,
-    0x8353, 0x0356, 0x035C, 0x8359, 0x0348, 0x834D, 0x8347, 0x0342,
-    0x03C0, 0x83C5, 0x83CF, 0x03CA, 0x83DB, 0x03DE, 0x03D4, 0x83D1,
-    0x83F3, 0x03F6, 0x03FC, 0x83F9, 0x03E8, 0x83ED, 0x83E7, 0x03E2,
-    0x83A3, 0x03A6, 0x03AC, 0x83A9, 0x03B8, 0x83BD, 0x83B7, 0x03B2,
-    0x0390, 0x8395, 0x839F, 0x039A, 0x838B, 0x038E, 0x0384, 0x8381,
-    0x0280, 0x8285, 0x828F, 0x028A, 0x829B, 0x029E, 0x0294, 0x8291,
-    0x82B3, 0x02B6, 0x02BC, 0x82B9, 0x02A8, 0x82AD, 0x82A7, 0x02A2,
-    0x82E3, 0x02E6, 0x02EC, 0x82E9, 0x02F8, 0x82FD, 0x82F7, 0x02F2,
-    0x02D0, 0x82D5, 0x82DF, 0x02DA, 0x82CB, 0x02CE, 0x02C4, 0x82C1,
-    0x8243, 0x0246, 0x024C, 0x8249, 0x0258, 0x825D, 0x8257, 0x0252,
-    0x0270, 0x8275, 0x827F, 0x027A, 0x826B, 0x026E, 0x0264, 0x8261,
-    0x0220, 0x8225, 0x822F, 0x022A, 0x823B, 0x023E, 0x0234, 0x8231,
-    0x8213, 0x0216, 0x021C, 0x8219, 0x0208, 0x820D, 0x8207, 0x0202
-};
-
-static DRFLAC_INLINE drflac_uint8 drflac_crc8_byte(drflac_uint8 crc, drflac_uint8 data)
-{
-    return drflac__crc8_table[crc ^ data];
-}
-
-static DRFLAC_INLINE drflac_uint8 drflac_crc8(drflac_uint8 crc, drflac_uint32 data, drflac_uint32 count)
-{
-#ifdef DR_FLAC_NO_CRC
-    (void)crc;
-    (void)data;
-    (void)count;
-    return 0;
-#else
-#if 0
-    /* REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc8(crc, 0, 8);") */
-    drflac_uint8 p = 0x07;
-    for (int i = count-1; i >= 0; --i) {
-        drflac_uint8 bit = (data & (1 << i)) >> i;
-        if (crc & 0x80) {
-            crc = ((crc << 1) | bit) ^ p;
-        } else {
-            crc = ((crc << 1) | bit);
-        }
-    }
-    return crc;
-#else
-    drflac_uint32 wholeBytes;
-    drflac_uint32 leftoverBits;
-    drflac_uint64 leftoverDataMask;
-
-    static drflac_uint64 leftoverDataMaskTable[8] = {
-        0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
-    };
-
-    DRFLAC_ASSERT(count <= 32);
-
-    wholeBytes = count >> 3;
-    leftoverBits = count - (wholeBytes*8);
-    leftoverDataMask = leftoverDataMaskTable[leftoverBits];
-
-    switch (wholeBytes) {
-        case 4: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0xFF000000UL << leftoverBits)) >> (24 + leftoverBits)));
-        case 3: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x00FF0000UL << leftoverBits)) >> (16 + leftoverBits)));
-        case 2: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x0000FF00UL << leftoverBits)) >> ( 8 + leftoverBits)));
-        case 1: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x000000FFUL << leftoverBits)) >> ( 0 + leftoverBits)));
-        case 0: if (leftoverBits > 0) crc = (drflac_uint8)((crc << leftoverBits) ^ drflac__crc8_table[(crc >> (8 - leftoverBits)) ^ (data & leftoverDataMask)]);
-    }
-    return crc;
-#endif
-#endif
-}
-
-static DRFLAC_INLINE drflac_uint16 drflac_crc16_byte(drflac_uint16 crc, drflac_uint8 data)
-{
-    return (crc << 8) ^ drflac__crc16_table[(drflac_uint8)(crc >> 8) ^ data];
-}
-
-static DRFLAC_INLINE drflac_uint16 drflac_crc16_cache(drflac_uint16 crc, drflac_cache_t data)
-{
-#ifdef DRFLAC_64BIT
-    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 56) & 0xFF));
-    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 48) & 0xFF));
-    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 40) & 0xFF));
-    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 32) & 0xFF));
-#endif
-    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 24) & 0xFF));
-    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 16) & 0xFF));
-    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  8) & 0xFF));
-    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  0) & 0xFF));
-
-    return crc;
-}
-
-static DRFLAC_INLINE drflac_uint16 drflac_crc16_bytes(drflac_uint16 crc, drflac_cache_t data, drflac_uint32 byteCount)
-{
-    switch (byteCount)
-    {
-#ifdef DRFLAC_64BIT
-    case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 56) & 0xFF));
-    case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 48) & 0xFF));
-    case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 40) & 0xFF));
-    case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 32) & 0xFF));
-#endif
-    case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 24) & 0xFF));
-    case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 16) & 0xFF));
-    case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  8) & 0xFF));
-    case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  0) & 0xFF));
-    }
-
-    return crc;
-}
-
-#if 0
-static DRFLAC_INLINE drflac_uint16 drflac_crc16__32bit(drflac_uint16 crc, drflac_uint32 data, drflac_uint32 count)
-{
-#ifdef DR_FLAC_NO_CRC
-    (void)crc;
-    (void)data;
-    (void)count;
-    return 0;
-#else
-#if 0
-    /* REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc16(crc, 0, 16);") */
-    drflac_uint16 p = 0x8005;
-    for (int i = count-1; i >= 0; --i) {
-        drflac_uint16 bit = (data & (1ULL << i)) >> i;
-        if (r & 0x8000) {
-            r = ((r << 1) | bit) ^ p;
-        } else {
-            r = ((r << 1) | bit);
-        }
-    }
-
-    return crc;
-#else
-    drflac_uint32 wholeBytes;
-    drflac_uint32 leftoverBits;
-    drflac_uint64 leftoverDataMask;
-
-    static drflac_uint64 leftoverDataMaskTable[8] = {
-        0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
-    };
-
-    DRFLAC_ASSERT(count <= 64);
-
-    wholeBytes = count >> 3;
-    leftoverBits = count & 7;
-    leftoverDataMask = leftoverDataMaskTable[leftoverBits];
-
-    switch (wholeBytes) {
-        default:
-        case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0xFF000000UL << leftoverBits)) >> (24 + leftoverBits)));
-        case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x00FF0000UL << leftoverBits)) >> (16 + leftoverBits)));
-        case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x0000FF00UL << leftoverBits)) >> ( 8 + leftoverBits)));
-        case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x000000FFUL << leftoverBits)) >> ( 0 + leftoverBits)));
-        case 0: if (leftoverBits > 0) crc = (crc << leftoverBits) ^ drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)];
-    }
-    return crc;
-#endif
-#endif
-}
-
-static DRFLAC_INLINE drflac_uint16 drflac_crc16__64bit(drflac_uint16 crc, drflac_uint64 data, drflac_uint32 count)
-{
-#ifdef DR_FLAC_NO_CRC
-    (void)crc;
-    (void)data;
-    (void)count;
-    return 0;
-#else
-    drflac_uint32 wholeBytes;
-    drflac_uint32 leftoverBits;
-    drflac_uint64 leftoverDataMask;
-
-    static drflac_uint64 leftoverDataMaskTable[8] = {
-        0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
-    };
-
-    DRFLAC_ASSERT(count <= 64);
-
-    wholeBytes = count >> 3;
-    leftoverBits = count & 7;
-    leftoverDataMask = leftoverDataMaskTable[leftoverBits];
-
-    switch (wholeBytes) {
-        default:
-        case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0xFF000000 << 32) << leftoverBits)) >> (56 + leftoverBits)));    /* Weird "<< 32" bitshift is required for C89 because it doesn't support 64-bit constants. Should be optimized out by a good compiler. */
-        case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x00FF0000 << 32) << leftoverBits)) >> (48 + leftoverBits)));
-        case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x0000FF00 << 32) << leftoverBits)) >> (40 + leftoverBits)));
-        case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x000000FF << 32) << leftoverBits)) >> (32 + leftoverBits)));
-        case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0xFF000000      ) << leftoverBits)) >> (24 + leftoverBits)));
-        case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x00FF0000      ) << leftoverBits)) >> (16 + leftoverBits)));
-        case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x0000FF00      ) << leftoverBits)) >> ( 8 + leftoverBits)));
-        case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x000000FF      ) << leftoverBits)) >> ( 0 + leftoverBits)));
-        case 0: if (leftoverBits > 0) crc = (crc << leftoverBits) ^ drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)];
-    }
-    return crc;
-#endif
-}
-
-
-static DRFLAC_INLINE drflac_uint16 drflac_crc16(drflac_uint16 crc, drflac_cache_t data, drflac_uint32 count)
-{
-#ifdef DRFLAC_64BIT
-    return drflac_crc16__64bit(crc, data, count);
-#else
-    return drflac_crc16__32bit(crc, data, count);
-#endif
-}
-#endif
-
-
-#ifdef DRFLAC_64BIT
-#define drflac__be2host__cache_line drflac__be2host_64
-#else
-#define drflac__be2host__cache_line drflac__be2host_32
-#endif
-
-/*
-BIT READING ATTEMPT #2
-
-This uses a 32- or 64-bit bit-shifted cache - as bits are read, the cache is shifted such that the first valid bit is sitting
-on the most significant bit. It uses the notion of an L1 and L2 cache (borrowed from CPU architecture), where the L1 cache
-is a 32- or 64-bit unsigned integer (depending on whether or not a 32- or 64-bit build is being compiled) and the L2 is an
-array of "cache lines", with each cache line being the same size as the L1. The L2 is a buffer of about 4KB and is where data
-from onRead() is read into.
-*/
-#define DRFLAC_CACHE_L1_SIZE_BYTES(bs)                      (sizeof((bs)->cache))
-#define DRFLAC_CACHE_L1_SIZE_BITS(bs)                       (sizeof((bs)->cache)*8)
-#define DRFLAC_CACHE_L1_BITS_REMAINING(bs)                  (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (bs)->consumedBits)
-#define DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount)           (~((~(drflac_cache_t)0) >> (_bitCount)))
-#define DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, _bitCount)      (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (_bitCount))
-#define DRFLAC_CACHE_L1_SELECT(bs, _bitCount)               (((bs)->cache) & DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount))
-#define DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, _bitCount)     (DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >>  DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount)))
-#define DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, _bitCount)(DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >> (DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount)) & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1)))
-#define DRFLAC_CACHE_L2_SIZE_BYTES(bs)                      (sizeof((bs)->cacheL2))
-#define DRFLAC_CACHE_L2_LINE_COUNT(bs)                      (DRFLAC_CACHE_L2_SIZE_BYTES(bs) / sizeof((bs)->cacheL2[0]))
-#define DRFLAC_CACHE_L2_LINES_REMAINING(bs)                 (DRFLAC_CACHE_L2_LINE_COUNT(bs) - (bs)->nextL2Line)
-
-
-#ifndef DR_FLAC_NO_CRC
-static DRFLAC_INLINE void drflac__reset_crc16(drflac_bs* bs)
-{
-    bs->crc16 = 0;
-    bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3;
-}
-
-static DRFLAC_INLINE void drflac__update_crc16(drflac_bs* bs)
-{
-    if (bs->crc16CacheIgnoredBytes == 0) {
-        bs->crc16 = drflac_crc16_cache(bs->crc16, bs->crc16Cache);
-    } else {
-        bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache, DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bs->crc16CacheIgnoredBytes);
-        bs->crc16CacheIgnoredBytes = 0;
-    }
-}
-
-static DRFLAC_INLINE drflac_uint16 drflac__flush_crc16(drflac_bs* bs)
-{
-    /* We should never be flushing in a situation where we are not aligned on a byte boundary. */
-    DRFLAC_ASSERT((DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7) == 0);
-
-    /*
-    The bits that were read from the L1 cache need to be accumulated. The number of bytes needing to be accumulated is determined
-    by the number of bits that have been consumed.
-    */
-    if (DRFLAC_CACHE_L1_BITS_REMAINING(bs) == 0) {
-        drflac__update_crc16(bs);
-    } else {
-        /* We only accumulate the consumed bits. */
-        bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache >> DRFLAC_CACHE_L1_BITS_REMAINING(bs), (bs->consumedBits >> 3) - bs->crc16CacheIgnoredBytes);
-
-        /*
-        The bits that we just accumulated should never be accumulated again. We need to keep track of how many bytes were accumulated
-        so we can handle that later.
-        */
-        bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3;
-    }
-
-    return bs->crc16;
-}
-#endif
-
-static DRFLAC_INLINE drflac_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs)
-{
-    size_t bytesRead;
-    size_t alignedL1LineCount;
-
-    /* Fast path. Try loading straight from L2. */
-    if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
-        bs->cache = bs->cacheL2[bs->nextL2Line++];
-        return DRFLAC_TRUE;
-    }
-
-    /*
-    If we get here it means we've run out of data in the L2 cache. We'll need to fetch more from the client, if there's
-    any left.
-    */
-    if (bs->unalignedByteCount > 0) {
-        return DRFLAC_FALSE;   /* If we have any unaligned bytes it means there's no more aligned bytes left in the client. */
-    }
-
-    bytesRead = bs->onRead(bs->pUserData, bs->cacheL2, DRFLAC_CACHE_L2_SIZE_BYTES(bs));
-
-    bs->nextL2Line = 0;
-    if (bytesRead == DRFLAC_CACHE_L2_SIZE_BYTES(bs)) {
-        bs->cache = bs->cacheL2[bs->nextL2Line++];
-        return DRFLAC_TRUE;
-    }
-
-
-    /*
-    If we get here it means we were unable to retrieve enough data to fill the entire L2 cache. It probably
-    means we've just reached the end of the file. We need to move the valid data down to the end of the buffer
-    and adjust the index of the next line accordingly. Also keep in mind that the L2 cache must be aligned to
-    the size of the L1 so we'll need to seek backwards by any misaligned bytes.
-    */
-    alignedL1LineCount = bytesRead / DRFLAC_CACHE_L1_SIZE_BYTES(bs);
-
-    /* We need to keep track of any unaligned bytes for later use. */
-    bs->unalignedByteCount = bytesRead - (alignedL1LineCount * DRFLAC_CACHE_L1_SIZE_BYTES(bs));
-    if (bs->unalignedByteCount > 0) {
-        bs->unalignedCache = bs->cacheL2[alignedL1LineCount];
-    }
-
-    if (alignedL1LineCount > 0) {
-        size_t offset = DRFLAC_CACHE_L2_LINE_COUNT(bs) - alignedL1LineCount;
-        size_t i;
-        for (i = alignedL1LineCount; i > 0; --i) {
-            bs->cacheL2[i-1 + offset] = bs->cacheL2[i-1];
-        }
-
-        bs->nextL2Line = (drflac_uint32)offset;
-        bs->cache = bs->cacheL2[bs->nextL2Line++];
-        return DRFLAC_TRUE;
-    } else {
-        /* If we get into this branch it means we weren't able to load any L1-aligned data. */
-        bs->nextL2Line = DRFLAC_CACHE_L2_LINE_COUNT(bs);
-        return DRFLAC_FALSE;
-    }
-}
-
-static drflac_bool32 drflac__reload_cache(drflac_bs* bs)
-{
-    size_t bytesRead;
-
-#ifndef DR_FLAC_NO_CRC
-    drflac__update_crc16(bs);
-#endif
-
-    /* Fast path. Try just moving the next value in the L2 cache to the L1 cache. */
-    if (drflac__reload_l1_cache_from_l2(bs)) {
-        bs->cache = drflac__be2host__cache_line(bs->cache);
-        bs->consumedBits = 0;
-#ifndef DR_FLAC_NO_CRC
-        bs->crc16Cache = bs->cache;
-#endif
-        return DRFLAC_TRUE;
-    }
-
-    /* Slow path. */
-
-    /*
-    If we get here it means we have failed to load the L1 cache from the L2. Likely we've just reached the end of the stream and the last
-    few bytes did not meet the alignment requirements for the L2 cache. In this case we need to fall back to a slower path and read the
-    data from the unaligned cache.
-    */
-    bytesRead = bs->unalignedByteCount;
-    if (bytesRead == 0) {
-        bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);   /* <-- The stream has been exhausted, so marked the bits as consumed. */
-        return DRFLAC_FALSE;
-    }
-
-    DRFLAC_ASSERT(bytesRead < DRFLAC_CACHE_L1_SIZE_BYTES(bs));
-    bs->consumedBits = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bytesRead) * 8;
-
-    bs->cache = drflac__be2host__cache_line(bs->unalignedCache);
-    bs->cache &= DRFLAC_CACHE_L1_SELECTION_MASK(DRFLAC_CACHE_L1_BITS_REMAINING(bs));    /* <-- Make sure the consumed bits are always set to zero. Other parts of the library depend on this property. */
-    bs->unalignedByteCount = 0;     /* <-- At this point the unaligned bytes have been moved into the cache and we thus have no more unaligned bytes. */
-
-#ifndef DR_FLAC_NO_CRC
-    bs->crc16Cache = bs->cache >> bs->consumedBits;
-    bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3;
-#endif
-    return DRFLAC_TRUE;
-}
-
-static void drflac__reset_cache(drflac_bs* bs)
-{
-    bs->nextL2Line   = DRFLAC_CACHE_L2_LINE_COUNT(bs);  /* <-- This clears the L2 cache. */
-    bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);   /* <-- This clears the L1 cache. */
-    bs->cache = 0;
-    bs->unalignedByteCount = 0;                         /* <-- This clears the trailing unaligned bytes. */
-    bs->unalignedCache = 0;
-
-#ifndef DR_FLAC_NO_CRC
-    bs->crc16Cache = 0;
-    bs->crc16CacheIgnoredBytes = 0;
-#endif
-}
-
-
-static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned int bitCount, drflac_uint32* pResultOut)
-{
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(pResultOut != NULL);
-    DRFLAC_ASSERT(bitCount > 0);
-    DRFLAC_ASSERT(bitCount <= 32);
-
-    if (bs->consumedBits == DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
-        if (!drflac__reload_cache(bs)) {
-            return DRFLAC_FALSE;
-        }
-    }
-
-    if (bitCount <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
-        /*
-        If we want to load all 32-bits from a 32-bit cache we need to do it slightly differently because we can't do
-        a 32-bit shift on a 32-bit integer. This will never be the case on 64-bit caches, so we can have a slightly
-        more optimal solution for this.
-        */
-#ifdef DRFLAC_64BIT
-        *pResultOut = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount);
-        bs->consumedBits += bitCount;
-        bs->cache <<= bitCount;
-#else
-        if (bitCount < DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
-            *pResultOut = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount);
-            bs->consumedBits += bitCount;
-            bs->cache <<= bitCount;
-        } else {
-            /* Cannot shift by 32-bits, so need to do it differently. */
-            *pResultOut = (drflac_uint32)bs->cache;
-            bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);
-            bs->cache = 0;
-        }
-#endif
-
-        return DRFLAC_TRUE;
-    } else {
-        /* It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. */
-        drflac_uint32 bitCountHi = DRFLAC_CACHE_L1_BITS_REMAINING(bs);
-        drflac_uint32 bitCountLo = bitCount - bitCountHi;
-        drflac_uint32 resultHi;
-
-        DRFLAC_ASSERT(bitCountHi > 0);
-        DRFLAC_ASSERT(bitCountHi < 32);
-        resultHi = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountHi);
-
-        if (!drflac__reload_cache(bs)) {
-            return DRFLAC_FALSE;
-        }
-
-        *pResultOut = (resultHi << bitCountLo) | (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountLo);
-        bs->consumedBits += bitCountLo;
-        bs->cache <<= bitCountLo;
-        return DRFLAC_TRUE;
-    }
-}
-
-static drflac_bool32 drflac__read_int32(drflac_bs* bs, unsigned int bitCount, drflac_int32* pResult)
-{
-    drflac_uint32 result;
-    drflac_uint32 signbit;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(pResult != NULL);
-    DRFLAC_ASSERT(bitCount > 0);
-    DRFLAC_ASSERT(bitCount <= 32);
-
-    if (!drflac__read_uint32(bs, bitCount, &result)) {
-        return DRFLAC_FALSE;
-    }
-
-    signbit = ((result >> (bitCount-1)) & 0x01);
-    result |= (~signbit + 1) << bitCount;
-
-    *pResult = (drflac_int32)result;
-    return DRFLAC_TRUE;
-}
-
-#ifdef DRFLAC_64BIT
-static drflac_bool32 drflac__read_uint64(drflac_bs* bs, unsigned int bitCount, drflac_uint64* pResultOut)
-{
-    drflac_uint32 resultHi;
-    drflac_uint32 resultLo;
-
-    DRFLAC_ASSERT(bitCount <= 64);
-    DRFLAC_ASSERT(bitCount >  32);
-
-    if (!drflac__read_uint32(bs, bitCount - 32, &resultHi)) {
-        return DRFLAC_FALSE;
-    }
-
-    if (!drflac__read_uint32(bs, 32, &resultLo)) {
-        return DRFLAC_FALSE;
-    }
-
-    *pResultOut = (((drflac_uint64)resultHi) << 32) | ((drflac_uint64)resultLo);
-    return DRFLAC_TRUE;
-}
-#endif
-
-/* Function below is unused, but leaving it here in case I need to quickly add it again. */
-#if 0
-static drflac_bool32 drflac__read_int64(drflac_bs* bs, unsigned int bitCount, drflac_int64* pResultOut)
-{
-    drflac_uint64 result;
-    drflac_uint64 signbit;
-
-    DRFLAC_ASSERT(bitCount <= 64);
-
-    if (!drflac__read_uint64(bs, bitCount, &result)) {
-        return DRFLAC_FALSE;
-    }
-
-    signbit = ((result >> (bitCount-1)) & 0x01);
-    result |= (~signbit + 1) << bitCount;
-
-    *pResultOut = (drflac_int64)result;
-    return DRFLAC_TRUE;
-}
-#endif
-
-static drflac_bool32 drflac__read_uint16(drflac_bs* bs, unsigned int bitCount, drflac_uint16* pResult)
-{
-    drflac_uint32 result;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(pResult != NULL);
-    DRFLAC_ASSERT(bitCount > 0);
-    DRFLAC_ASSERT(bitCount <= 16);
-
-    if (!drflac__read_uint32(bs, bitCount, &result)) {
-        return DRFLAC_FALSE;
-    }
-
-    *pResult = (drflac_uint16)result;
-    return DRFLAC_TRUE;
-}
-
-#if 0
-static drflac_bool32 drflac__read_int16(drflac_bs* bs, unsigned int bitCount, drflac_int16* pResult)
-{
-    drflac_int32 result;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(pResult != NULL);
-    DRFLAC_ASSERT(bitCount > 0);
-    DRFLAC_ASSERT(bitCount <= 16);
-
-    if (!drflac__read_int32(bs, bitCount, &result)) {
-        return DRFLAC_FALSE;
-    }
-
-    *pResult = (drflac_int16)result;
-    return DRFLAC_TRUE;
-}
-#endif
-
-static drflac_bool32 drflac__read_uint8(drflac_bs* bs, unsigned int bitCount, drflac_uint8* pResult)
-{
-    drflac_uint32 result;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(pResult != NULL);
-    DRFLAC_ASSERT(bitCount > 0);
-    DRFLAC_ASSERT(bitCount <= 8);
-
-    if (!drflac__read_uint32(bs, bitCount, &result)) {
-        return DRFLAC_FALSE;
-    }
-
-    *pResult = (drflac_uint8)result;
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__read_int8(drflac_bs* bs, unsigned int bitCount, drflac_int8* pResult)
-{
-    drflac_int32 result;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(pResult != NULL);
-    DRFLAC_ASSERT(bitCount > 0);
-    DRFLAC_ASSERT(bitCount <= 8);
-
-    if (!drflac__read_int32(bs, bitCount, &result)) {
-        return DRFLAC_FALSE;
-    }
-
-    *pResult = (drflac_int8)result;
-    return DRFLAC_TRUE;
-}
-
-
-static drflac_bool32 drflac__seek_bits(drflac_bs* bs, size_t bitsToSeek)
-{
-    if (bitsToSeek <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
-        bs->consumedBits += (drflac_uint32)bitsToSeek;
-        bs->cache <<= bitsToSeek;
-        return DRFLAC_TRUE;
-    } else {
-        /* It straddles the cached data. This function isn't called too frequently so I'm favouring simplicity here. */
-        bitsToSeek       -= DRFLAC_CACHE_L1_BITS_REMAINING(bs);
-        bs->consumedBits += DRFLAC_CACHE_L1_BITS_REMAINING(bs);
-        bs->cache         = 0;
-
-        /* Simple case. Seek in groups of the same number as bits that fit within a cache line. */
-#ifdef DRFLAC_64BIT
-        while (bitsToSeek >= DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
-            drflac_uint64 bin;
-            if (!drflac__read_uint64(bs, DRFLAC_CACHE_L1_SIZE_BITS(bs), &bin)) {
-                return DRFLAC_FALSE;
-            }
-            bitsToSeek -= DRFLAC_CACHE_L1_SIZE_BITS(bs);
-        }
-#else
-        while (bitsToSeek >= DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
-            drflac_uint32 bin;
-            if (!drflac__read_uint32(bs, DRFLAC_CACHE_L1_SIZE_BITS(bs), &bin)) {
-                return DRFLAC_FALSE;
-            }
-            bitsToSeek -= DRFLAC_CACHE_L1_SIZE_BITS(bs);
-        }
-#endif
-
-        /* Whole leftover bytes. */
-        while (bitsToSeek >= 8) {
-            drflac_uint8 bin;
-            if (!drflac__read_uint8(bs, 8, &bin)) {
-                return DRFLAC_FALSE;
-            }
-            bitsToSeek -= 8;
-        }
-
-        /* Leftover bits. */
-        if (bitsToSeek > 0) {
-            drflac_uint8 bin;
-            if (!drflac__read_uint8(bs, (drflac_uint32)bitsToSeek, &bin)) {
-                return DRFLAC_FALSE;
-            }
-            bitsToSeek = 0; /* <-- Necessary for the assert below. */
-        }
-
-        DRFLAC_ASSERT(bitsToSeek == 0);
-        return DRFLAC_TRUE;
-    }
-}
-
-
-/* This function moves the bit streamer to the first bit after the sync code (bit 15 of the of the frame header). It will also update the CRC-16. */
-static drflac_bool32 drflac__find_and_seek_to_next_sync_code(drflac_bs* bs)
-{
-    DRFLAC_ASSERT(bs != NULL);
-
-    /*
-    The sync code is always aligned to 8 bits. This is convenient for us because it means we can do byte-aligned movements. The first
-    thing to do is align to the next byte.
-    */
-    if (!drflac__seek_bits(bs, DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7)) {
-        return DRFLAC_FALSE;
-    }
-
-    for (;;) {
-        drflac_uint8 hi;
-
-#ifndef DR_FLAC_NO_CRC
-        drflac__reset_crc16(bs);
-#endif
-
-        if (!drflac__read_uint8(bs, 8, &hi)) {
-            return DRFLAC_FALSE;
-        }
-
-        if (hi == 0xFF) {
-            drflac_uint8 lo;
-            if (!drflac__read_uint8(bs, 6, &lo)) {
-                return DRFLAC_FALSE;
-            }
-
-            if (lo == 0x3E) {
-                return DRFLAC_TRUE;
-            } else {
-                if (!drflac__seek_bits(bs, DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7)) {
-                    return DRFLAC_FALSE;
-                }
-            }
-        }
-    }
-
-    /* Should never get here. */
-    /*return DRFLAC_FALSE;*/
-}
-
-
-#if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
-#define DRFLAC_IMPLEMENT_CLZ_LZCNT
-#endif
-#if  defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(DRFLAC_X64) || defined(DRFLAC_X86))
-#define DRFLAC_IMPLEMENT_CLZ_MSVC
-#endif
-
-static DRFLAC_INLINE drflac_uint32 drflac__clz_software(drflac_cache_t x)
-{
-    drflac_uint32 n;
-    static drflac_uint32 clz_table_4[] = {
-        0,
-        4,
-        3, 3,
-        2, 2, 2, 2,
-        1, 1, 1, 1, 1, 1, 1, 1
-    };
-
-    if (x == 0) {
-        return sizeof(x)*8;
-    }
-
-    n = clz_table_4[x >> (sizeof(x)*8 - 4)];
-    if (n == 0) {
-#ifdef DRFLAC_64BIT
-        if ((x & ((drflac_uint64)0xFFFFFFFF << 32)) == 0) { n  = 32; x <<= 32; }
-        if ((x & ((drflac_uint64)0xFFFF0000 << 32)) == 0) { n += 16; x <<= 16; }
-        if ((x & ((drflac_uint64)0xFF000000 << 32)) == 0) { n += 8;  x <<= 8;  }
-        if ((x & ((drflac_uint64)0xF0000000 << 32)) == 0) { n += 4;  x <<= 4;  }
-#else
-        if ((x & 0xFFFF0000) == 0) { n  = 16; x <<= 16; }
-        if ((x & 0xFF000000) == 0) { n += 8;  x <<= 8;  }
-        if ((x & 0xF0000000) == 0) { n += 4;  x <<= 4;  }
-#endif
-        n += clz_table_4[x >> (sizeof(x)*8 - 4)];
-    }
-
-    return n - 1;
-}
-
-#ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT
-static DRFLAC_INLINE drflac_bool32 drflac__is_lzcnt_supported(void)
-{
-    /* Fast compile time check for ARM. */
-#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5)
-    return DRFLAC_TRUE;
-#else
-    /* If the compiler itself does not support the intrinsic then we'll need to return false. */
-    #ifdef DRFLAC_HAS_LZCNT_INTRINSIC
-        return drflac__gIsLZCNTSupported;
-    #else
-        return DRFLAC_FALSE;
-    #endif
-#endif
-}
-
-static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x)
-{
-#if defined(_MSC_VER) && !defined(__clang__)
-    #ifdef DRFLAC_64BIT
-        return (drflac_uint32)__lzcnt64(x);
-    #else
-        return (drflac_uint32)__lzcnt(x);
-    #endif
-#else
-    #if defined(__GNUC__) || defined(__clang__)
-        #if defined(DRFLAC_X64)
-            {
-                drflac_uint64 r;
-                __asm__ __volatile__ (
-                    "lzcnt{ %1, %0| %0, %1}" : "=r"(r) : "r"(x)
-                );
-
-                return (drflac_uint32)r;
-            }
-        #elif defined(DRFLAC_X86)
-            {
-                drflac_uint32 r;
-                __asm__ __volatile__ (
-                    "lzcnt{l %1, %0| %0, %1}" : "=r"(r) : "r"(x)
-                );
-
-                return r;
-            }
-        #elif defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) && !defined(DRFLAC_64BIT)   /* <-- I haven't tested 64-bit inline assembly, so only enabling this for the 32-bit build for now. */
-            {
-                unsigned int r;
-                __asm__ __volatile__ (
-                #if defined(DRFLAC_64BIT)
-                    "clz %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(x)   /* <-- This is untested. If someone in the community could test this, that would be appreciated! */
-                #else
-                    "clz %[out], %[in]" : [out]"=r"(r) : [in]"r"(x)
-                #endif
-                );
-
-                return r;
-            }
-        #else
-            if (x == 0) {
-                return sizeof(x)*8;
-            }
-            #ifdef DRFLAC_64BIT
-                return (drflac_uint32)__builtin_clzll((drflac_uint64)x);
-            #else
-                return (drflac_uint32)__builtin_clzl((drflac_uint32)x);
-            #endif
-        #endif
-    #else
-        /* Unsupported compiler. */
-        #error "This compiler does not support the lzcnt intrinsic."
-    #endif
-#endif
-}
-#endif
-
-#ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
-#include <intrin.h> /* For BitScanReverse(). */
-
-static DRFLAC_INLINE drflac_uint32 drflac__clz_msvc(drflac_cache_t x)
-{
-    drflac_uint32 n;
-
-    if (x == 0) {
-        return sizeof(x)*8;
-    }
-
-#ifdef DRFLAC_64BIT
-    _BitScanReverse64((unsigned long*)&n, x);
-#else
-    _BitScanReverse((unsigned long*)&n, x);
-#endif
-    return sizeof(x)*8 - n - 1;
-}
-#endif
-
-static DRFLAC_INLINE drflac_uint32 drflac__clz(drflac_cache_t x)
-{
-#ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT
-    if (drflac__is_lzcnt_supported()) {
-        return drflac__clz_lzcnt(x);
-    } else
-#endif
-    {
-#ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
-        return drflac__clz_msvc(x);
-#else
-        return drflac__clz_software(x);
-#endif
-    }
-}
-
-
-static DRFLAC_INLINE drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs, unsigned int* pOffsetOut)
-{
-    drflac_uint32 zeroCounter = 0;
-    drflac_uint32 setBitOffsetPlus1;
-
-    while (bs->cache == 0) {
-        zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs);
-        if (!drflac__reload_cache(bs)) {
-            return DRFLAC_FALSE;
-        }
-    }
-
-    setBitOffsetPlus1 = drflac__clz(bs->cache);
-    setBitOffsetPlus1 += 1;
-
-    bs->consumedBits += setBitOffsetPlus1;
-    bs->cache <<= setBitOffsetPlus1;
-
-    *pOffsetOut = zeroCounter + setBitOffsetPlus1 - 1;
-    return DRFLAC_TRUE;
-}
-
-
-
-static drflac_bool32 drflac__seek_to_byte(drflac_bs* bs, drflac_uint64 offsetFromStart)
-{
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(offsetFromStart > 0);
-
-    /*
-    Seeking from the start is not quite as trivial as it sounds because the onSeek callback takes a signed 32-bit integer (which
-    is intentional because it simplifies the implementation of the onSeek callbacks), however offsetFromStart is unsigned 64-bit.
-    To resolve we just need to do an initial seek from the start, and then a series of offset seeks to make up the remainder.
-    */
-    if (offsetFromStart > 0x7FFFFFFF) {
-        drflac_uint64 bytesRemaining = offsetFromStart;
-        if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) {
-            return DRFLAC_FALSE;
-        }
-        bytesRemaining -= 0x7FFFFFFF;
-
-        while (bytesRemaining > 0x7FFFFFFF) {
-            if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, drflac_seek_origin_current)) {
-                return DRFLAC_FALSE;
-            }
-            bytesRemaining -= 0x7FFFFFFF;
-        }
-
-        if (bytesRemaining > 0) {
-            if (!bs->onSeek(bs->pUserData, (int)bytesRemaining, drflac_seek_origin_current)) {
-                return DRFLAC_FALSE;
-            }
-        }
-    } else {
-        if (!bs->onSeek(bs->pUserData, (int)offsetFromStart, drflac_seek_origin_start)) {
-            return DRFLAC_FALSE;
-        }
-    }
-
-    /* The cache should be reset to force a reload of fresh data from the client. */
-    drflac__reset_cache(bs);
-    return DRFLAC_TRUE;
-}
-
-
-static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64* pNumberOut, drflac_uint8* pCRCOut)
-{
-    drflac_uint8 crc;
-    drflac_uint64 result;
-    drflac_uint8 utf8[7] = {0};
-    int byteCount;
-    int i;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(pNumberOut != NULL);
-    DRFLAC_ASSERT(pCRCOut != NULL);
-
-    crc = *pCRCOut;
-
-    if (!drflac__read_uint8(bs, 8, utf8)) {
-        *pNumberOut = 0;
-        return DRFLAC_AT_END;
-    }
-    crc = drflac_crc8(crc, utf8[0], 8);
-
-    if ((utf8[0] & 0x80) == 0) {
-        *pNumberOut = utf8[0];
-        *pCRCOut = crc;
-        return DRFLAC_SUCCESS;
-    }
-
-    /*byteCount = 1;*/
-    if ((utf8[0] & 0xE0) == 0xC0) {
-        byteCount = 2;
-    } else if ((utf8[0] & 0xF0) == 0xE0) {
-        byteCount = 3;
-    } else if ((utf8[0] & 0xF8) == 0xF0) {
-        byteCount = 4;
-    } else if ((utf8[0] & 0xFC) == 0xF8) {
-        byteCount = 5;
-    } else if ((utf8[0] & 0xFE) == 0xFC) {
-        byteCount = 6;
-    } else if ((utf8[0] & 0xFF) == 0xFE) {
-        byteCount = 7;
-    } else {
-        *pNumberOut = 0;
-        return DRFLAC_CRC_MISMATCH;     /* Bad UTF-8 encoding. */
-    }
-
-    /* Read extra bytes. */
-    DRFLAC_ASSERT(byteCount > 1);
-
-    result = (drflac_uint64)(utf8[0] & (0xFF >> (byteCount + 1)));
-    for (i = 1; i < byteCount; ++i) {
-        if (!drflac__read_uint8(bs, 8, utf8 + i)) {
-            *pNumberOut = 0;
-            return DRFLAC_AT_END;
-        }
-        crc = drflac_crc8(crc, utf8[i], 8);
-
-        result = (result << 6) | (utf8[i] & 0x3F);
-    }
-
-    *pNumberOut = result;
-    *pCRCOut = crc;
-    return DRFLAC_SUCCESS;
-}
-
-
-
-/*
-The next two functions are responsible for calculating the prediction.
-
-When the bits per sample is >16 we need to use 64-bit integer arithmetic because otherwise we'll run out of precision. It's
-safe to assume this will be slower on 32-bit platforms so we use a more optimal solution when the bits per sample is <=16.
-*/
-static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_32(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
-{
-    drflac_int32 prediction = 0;
-
-    DRFLAC_ASSERT(order <= 32);
-
-    /* 32-bit version. */
-
-    /* VC++ optimizes this to a single jmp. I've not yet verified this for other compilers. */
-    switch (order)
-    {
-    case 32: prediction += coefficients[31] * pDecodedSamples[-32];
-    case 31: prediction += coefficients[30] * pDecodedSamples[-31];
-    case 30: prediction += coefficients[29] * pDecodedSamples[-30];
-    case 29: prediction += coefficients[28] * pDecodedSamples[-29];
-    case 28: prediction += coefficients[27] * pDecodedSamples[-28];
-    case 27: prediction += coefficients[26] * pDecodedSamples[-27];
-    case 26: prediction += coefficients[25] * pDecodedSamples[-26];
-    case 25: prediction += coefficients[24] * pDecodedSamples[-25];
-    case 24: prediction += coefficients[23] * pDecodedSamples[-24];
-    case 23: prediction += coefficients[22] * pDecodedSamples[-23];
-    case 22: prediction += coefficients[21] * pDecodedSamples[-22];
-    case 21: prediction += coefficients[20] * pDecodedSamples[-21];
-    case 20: prediction += coefficients[19] * pDecodedSamples[-20];
-    case 19: prediction += coefficients[18] * pDecodedSamples[-19];
-    case 18: prediction += coefficients[17] * pDecodedSamples[-18];
-    case 17: prediction += coefficients[16] * pDecodedSamples[-17];
-    case 16: prediction += coefficients[15] * pDecodedSamples[-16];
-    case 15: prediction += coefficients[14] * pDecodedSamples[-15];
-    case 14: prediction += coefficients[13] * pDecodedSamples[-14];
-    case 13: prediction += coefficients[12] * pDecodedSamples[-13];
-    case 12: prediction += coefficients[11] * pDecodedSamples[-12];
-    case 11: prediction += coefficients[10] * pDecodedSamples[-11];
-    case 10: prediction += coefficients[ 9] * pDecodedSamples[-10];
-    case  9: prediction += coefficients[ 8] * pDecodedSamples[- 9];
-    case  8: prediction += coefficients[ 7] * pDecodedSamples[- 8];
-    case  7: prediction += coefficients[ 6] * pDecodedSamples[- 7];
-    case  6: prediction += coefficients[ 5] * pDecodedSamples[- 6];
-    case  5: prediction += coefficients[ 4] * pDecodedSamples[- 5];
-    case  4: prediction += coefficients[ 3] * pDecodedSamples[- 4];
-    case  3: prediction += coefficients[ 2] * pDecodedSamples[- 3];
-    case  2: prediction += coefficients[ 1] * pDecodedSamples[- 2];
-    case  1: prediction += coefficients[ 0] * pDecodedSamples[- 1];
-    }
-
-    return (drflac_int32)(prediction >> shift);
-}
-
-static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
-{
-    drflac_int64 prediction;
-
-    DRFLAC_ASSERT(order <= 32);
-
-    /* 64-bit version. */
-
-    /* This method is faster on the 32-bit build when compiling with VC++. See note below. */
-#ifndef DRFLAC_64BIT
-    if (order == 8)
-    {
-        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
-        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
-        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
-        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
-        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
-        prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6];
-        prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7];
-        prediction += coefficients[7] * (drflac_int64)pDecodedSamples[-8];
-    }
-    else if (order == 7)
-    {
-        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
-        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
-        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
-        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
-        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
-        prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6];
-        prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7];
-    }
-    else if (order == 3)
-    {
-        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
-        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
-        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
-    }
-    else if (order == 6)
-    {
-        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
-        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
-        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
-        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
-        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
-        prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6];
-    }
-    else if (order == 5)
-    {
-        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
-        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
-        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
-        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
-        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
-    }
-    else if (order == 4)
-    {
-        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
-        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
-        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
-        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
-    }
-    else if (order == 12)
-    {
-        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
-        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
-        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
-        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
-        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
-        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
-        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
-        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
-        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
-        prediction += coefficients[9]  * (drflac_int64)pDecodedSamples[-10];
-        prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11];
-        prediction += coefficients[11] * (drflac_int64)pDecodedSamples[-12];
-    }
-    else if (order == 2)
-    {
-        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
-        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
-    }
-    else if (order == 1)
-    {
-        prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
-    }
-    else if (order == 10)
-    {
-        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
-        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
-        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
-        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
-        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
-        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
-        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
-        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
-        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
-        prediction += coefficients[9]  * (drflac_int64)pDecodedSamples[-10];
-    }
-    else if (order == 9)
-    {
-        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
-        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
-        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
-        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
-        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
-        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
-        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
-        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
-        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
-    }
-    else if (order == 11)
-    {
-        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
-        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
-        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
-        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
-        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
-        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
-        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
-        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
-        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
-        prediction += coefficients[9]  * (drflac_int64)pDecodedSamples[-10];
-        prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11];
-    }
-    else
-    {
-        int j;
-
-        prediction = 0;
-        for (j = 0; j < (int)order; ++j) {
-            prediction += coefficients[j] * (drflac_int64)pDecodedSamples[-j-1];
-        }
-    }
-#endif
-
-    /*
-    VC++ optimizes this to a single jmp instruction, but only the 64-bit build. The 32-bit build generates less efficient code for some
-    reason. The ugly version above is faster so we'll just switch between the two depending on the target platform.
-    */
-#ifdef DRFLAC_64BIT
-    prediction = 0;
-    switch (order)
-    {
-    case 32: prediction += coefficients[31] * (drflac_int64)pDecodedSamples[-32];
-    case 31: prediction += coefficients[30] * (drflac_int64)pDecodedSamples[-31];
-    case 30: prediction += coefficients[29] * (drflac_int64)pDecodedSamples[-30];
-    case 29: prediction += coefficients[28] * (drflac_int64)pDecodedSamples[-29];
-    case 28: prediction += coefficients[27] * (drflac_int64)pDecodedSamples[-28];
-    case 27: prediction += coefficients[26] * (drflac_int64)pDecodedSamples[-27];
-    case 26: prediction += coefficients[25] * (drflac_int64)pDecodedSamples[-26];
-    case 25: prediction += coefficients[24] * (drflac_int64)pDecodedSamples[-25];
-    case 24: prediction += coefficients[23] * (drflac_int64)pDecodedSamples[-24];
-    case 23: prediction += coefficients[22] * (drflac_int64)pDecodedSamples[-23];
-    case 22: prediction += coefficients[21] * (drflac_int64)pDecodedSamples[-22];
-    case 21: prediction += coefficients[20] * (drflac_int64)pDecodedSamples[-21];
-    case 20: prediction += coefficients[19] * (drflac_int64)pDecodedSamples[-20];
-    case 19: prediction += coefficients[18] * (drflac_int64)pDecodedSamples[-19];
-    case 18: prediction += coefficients[17] * (drflac_int64)pDecodedSamples[-18];
-    case 17: prediction += coefficients[16] * (drflac_int64)pDecodedSamples[-17];
-    case 16: prediction += coefficients[15] * (drflac_int64)pDecodedSamples[-16];
-    case 15: prediction += coefficients[14] * (drflac_int64)pDecodedSamples[-15];
-    case 14: prediction += coefficients[13] * (drflac_int64)pDecodedSamples[-14];
-    case 13: prediction += coefficients[12] * (drflac_int64)pDecodedSamples[-13];
-    case 12: prediction += coefficients[11] * (drflac_int64)pDecodedSamples[-12];
-    case 11: prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11];
-    case 10: prediction += coefficients[ 9] * (drflac_int64)pDecodedSamples[-10];
-    case  9: prediction += coefficients[ 8] * (drflac_int64)pDecodedSamples[- 9];
-    case  8: prediction += coefficients[ 7] * (drflac_int64)pDecodedSamples[- 8];
-    case  7: prediction += coefficients[ 6] * (drflac_int64)pDecodedSamples[- 7];
-    case  6: prediction += coefficients[ 5] * (drflac_int64)pDecodedSamples[- 6];
-    case  5: prediction += coefficients[ 4] * (drflac_int64)pDecodedSamples[- 5];
-    case  4: prediction += coefficients[ 3] * (drflac_int64)pDecodedSamples[- 4];
-    case  3: prediction += coefficients[ 2] * (drflac_int64)pDecodedSamples[- 3];
-    case  2: prediction += coefficients[ 1] * (drflac_int64)pDecodedSamples[- 2];
-    case  1: prediction += coefficients[ 0] * (drflac_int64)pDecodedSamples[- 1];
-    }
-#endif
-
-    return (drflac_int32)(prediction >> shift);
-}
-
-
-#if 0
-/*
-Reference implementation for reading and decoding samples with residual. This is intentionally left unoptimized for the
-sake of readability and should only be used as a reference.
-*/
-static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
-{
-    drflac_uint32 i;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
-    DRFLAC_ASSERT(pSamplesOut != NULL);
-
-    for (i = 0; i < count; ++i) {
-        drflac_uint32 zeroCounter = 0;
-        for (;;) {
-            drflac_uint8 bit;
-            if (!drflac__read_uint8(bs, 1, &bit)) {
-                return DRFLAC_FALSE;
-            }
-
-            if (bit == 0) {
-                zeroCounter += 1;
-            } else {
-                break;
-            }
-        }
-
-        drflac_uint32 decodedRice;
-        if (riceParam > 0) {
-            if (!drflac__read_uint32(bs, riceParam, &decodedRice)) {
-                return DRFLAC_FALSE;
-            }
-        } else {
-            decodedRice = 0;
-        }
-
-        decodedRice |= (zeroCounter << riceParam);
-        if ((decodedRice & 0x01)) {
-            decodedRice = ~(decodedRice >> 1);
-        } else {
-            decodedRice =  (decodedRice >> 1);
-        }
-
-
-        if (bitsPerSample+shift >= 32) {
-            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i);
-        } else {
-            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i);
-        }
-    }
-
-    return DRFLAC_TRUE;
-}
-#endif
-
-#if 0
-static drflac_bool32 drflac__read_rice_parts__reference(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
-{
-    drflac_uint32 zeroCounter = 0;
-    drflac_uint32 decodedRice;
-
-    for (;;) {
-        drflac_uint8 bit;
-        if (!drflac__read_uint8(bs, 1, &bit)) {
-            return DRFLAC_FALSE;
-        }
-
-        if (bit == 0) {
-            zeroCounter += 1;
-        } else {
-            break;
-        }
-    }
-
-    if (riceParam > 0) {
-        if (!drflac__read_uint32(bs, riceParam, &decodedRice)) {
-            return DRFLAC_FALSE;
-        }
-    } else {
-        decodedRice = 0;
-    }
-
-    *pZeroCounterOut = zeroCounter;
-    *pRiceParamPartOut = decodedRice;
-    return DRFLAC_TRUE;
-}
-#endif
-
-#if 0
-static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
-{
-    drflac_cache_t riceParamMask;
-    drflac_uint32 zeroCounter;
-    drflac_uint32 setBitOffsetPlus1;
-    drflac_uint32 riceParamPart;
-    drflac_uint32 riceLength;
-
-    DRFLAC_ASSERT(riceParam > 0);   /* <-- riceParam should never be 0. drflac__read_rice_parts__param_equals_zero() should be used instead for this case. */
-
-    riceParamMask = DRFLAC_CACHE_L1_SELECTION_MASK(riceParam);
-
-    zeroCounter = 0;
-    while (bs->cache == 0) {
-        zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs);
-        if (!drflac__reload_cache(bs)) {
-            return DRFLAC_FALSE;
-        }
-    }
-
-    setBitOffsetPlus1 = drflac__clz(bs->cache);
-    zeroCounter += setBitOffsetPlus1;
-    setBitOffsetPlus1 += 1;
-
-    riceLength = setBitOffsetPlus1 + riceParam;
-    if (riceLength < DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
-        riceParamPart = (drflac_uint32)((bs->cache & (riceParamMask >> setBitOffsetPlus1)) >> DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceLength));
-
-        bs->consumedBits += riceLength;
-        bs->cache <<= riceLength;
-    } else {
-        drflac_uint32 bitCountLo;
-        drflac_cache_t resultHi;
-
-        bs->consumedBits += riceLength;
-        bs->cache <<= setBitOffsetPlus1 & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1);    /* <-- Equivalent to "if (setBitOffsetPlus1 < DRFLAC_CACHE_L1_SIZE_BITS(bs)) { bs->cache <<= setBitOffsetPlus1; }" */
-
-        /* It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. */
-        bitCountLo = bs->consumedBits - DRFLAC_CACHE_L1_SIZE_BITS(bs);
-        resultHi = DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, riceParam);  /* <-- Use DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE() if ever this function allows riceParam=0. */
-
-        if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
-#ifndef DR_FLAC_NO_CRC
-            drflac__update_crc16(bs);
-#endif
-            bs->cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
-            bs->consumedBits = 0;
-#ifndef DR_FLAC_NO_CRC
-            bs->crc16Cache = bs->cache;
-#endif
-        } else {
-            /* Slow path. We need to fetch more data from the client. */
-            if (!drflac__reload_cache(bs)) {
-                return DRFLAC_FALSE;
-            }
-        }
-
-        riceParamPart = (drflac_uint32)(resultHi | DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, bitCountLo));
-
-        bs->consumedBits += bitCountLo;
-        bs->cache <<= bitCountLo;
-    }
-
-    pZeroCounterOut[0] = zeroCounter;
-    pRiceParamPartOut[0] = riceParamPart;
-
-    return DRFLAC_TRUE;
-}
-#endif
-
-static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts_x1(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
-{
-    drflac_uint32  riceParamPlus1 = riceParam + 1;
-    /*drflac_cache_t riceParamPlus1Mask  = DRFLAC_CACHE_L1_SELECTION_MASK(riceParamPlus1);*/
-    drflac_uint32  riceParamPlus1Shift = DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPlus1);
-    drflac_uint32  riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1;
-
-    /*
-    The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have
-    no idea how this will work in practice...
-    */
-    drflac_cache_t bs_cache = bs->cache;
-    drflac_uint32  bs_consumedBits = bs->consumedBits;
-
-    /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */
-    drflac_uint32  lzcount = drflac__clz(bs_cache);
-    if (lzcount < sizeof(bs_cache)*8) {
-        pZeroCounterOut[0] = lzcount;
-
-        /*
-        It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting
-        this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled
-        outside of this function at a higher level.
-        */
-    extract_rice_param_part:
-        bs_cache       <<= lzcount;
-        bs_consumedBits += lzcount;
-
-        if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) {
-            /* Getting here means the rice parameter part is wholly contained within the current cache line. */
-            pRiceParamPartOut[0] = (drflac_uint32)(bs_cache >> riceParamPlus1Shift);
-            bs_cache       <<= riceParamPlus1;
-            bs_consumedBits += riceParamPlus1;
-        } else {
-            drflac_uint32 riceParamPartHi;
-            drflac_uint32 riceParamPartLo;
-            drflac_uint32 riceParamPartLoBitCount;
-
-            /*
-            Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache
-            line, reload the cache, and then combine it with the head of the next cache line.
-            */
-
-            /* Grab the high part of the rice parameter part. */
-            riceParamPartHi = (drflac_uint32)(bs_cache >> riceParamPlus1Shift);
-
-            /* Before reloading the cache we need to grab the size in bits of the low part. */
-            riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits;
-            DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32);
-
-            /* Now reload the cache. */
-            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
-            #ifndef DR_FLAC_NO_CRC
-                drflac__update_crc16(bs);
-            #endif
-                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
-                bs_consumedBits = riceParamPartLoBitCount;
-            #ifndef DR_FLAC_NO_CRC
-                bs->crc16Cache = bs_cache;
-            #endif
-            } else {
-                /* Slow path. We need to fetch more data from the client. */
-                if (!drflac__reload_cache(bs)) {
-                    return DRFLAC_FALSE;
-                }
-
-                bs_cache = bs->cache;
-                bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
-            }
-
-            /* We should now have enough information to construct the rice parameter part. */
-            riceParamPartLo = (drflac_uint32)(bs_cache >> (DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPartLoBitCount)));
-            pRiceParamPartOut[0] = riceParamPartHi | riceParamPartLo;
-
-            bs_cache <<= riceParamPartLoBitCount;
-        }
-    } else {
-        /*
-        Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call
-        to drflac__clz() and we need to reload the cache.
-        */
-        drflac_uint32 zeroCounter = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BITS(bs) - bs_consumedBits);
-        for (;;) {
-            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
-            #ifndef DR_FLAC_NO_CRC
-                drflac__update_crc16(bs);
-            #endif
-                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
-                bs_consumedBits = 0;
-            #ifndef DR_FLAC_NO_CRC
-                bs->crc16Cache = bs_cache;
-            #endif
-            } else {
-                /* Slow path. We need to fetch more data from the client. */
-                if (!drflac__reload_cache(bs)) {
-                    return DRFLAC_FALSE;
-                }
-
-                bs_cache = bs->cache;
-                bs_consumedBits = bs->consumedBits;
-            }
-
-            lzcount = drflac__clz(bs_cache);
-            zeroCounter += lzcount;
-
-            if (lzcount < sizeof(bs_cache)*8) {
-                break;
-            }
-        }
-
-        pZeroCounterOut[0] = zeroCounter;
-        goto extract_rice_param_part;
-    }
-
-    /* Make sure the cache is restored at the end of it all. */
-    bs->cache = bs_cache;
-    bs->consumedBits = bs_consumedBits;
-
-    return DRFLAC_TRUE;
-}
-
-static DRFLAC_INLINE drflac_bool32 drflac__seek_rice_parts(drflac_bs* bs, drflac_uint8 riceParam)
-{
-    drflac_uint32  riceParamPlus1 = riceParam + 1;
-    drflac_uint32  riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1;
-
-    /*
-    The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have
-    no idea how this will work in practice...
-    */
-    drflac_cache_t bs_cache = bs->cache;
-    drflac_uint32  bs_consumedBits = bs->consumedBits;
-
-    /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */
-    drflac_uint32  lzcount = drflac__clz(bs_cache);
-    if (lzcount < sizeof(bs_cache)*8) {
-        /*
-        It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting
-        this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled
-        outside of this function at a higher level.
-        */
-    extract_rice_param_part:
-        bs_cache       <<= lzcount;
-        bs_consumedBits += lzcount;
-
-        if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) {
-            /* Getting here means the rice parameter part is wholly contained within the current cache line. */
-            bs_cache       <<= riceParamPlus1;
-            bs_consumedBits += riceParamPlus1;
-        } else {
-            /*
-            Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache
-            line, reload the cache, and then combine it with the head of the next cache line.
-            */
-
-            /* Before reloading the cache we need to grab the size in bits of the low part. */
-            drflac_uint32 riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits;
-            DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32);
-
-            /* Now reload the cache. */
-            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
-            #ifndef DR_FLAC_NO_CRC
-                drflac__update_crc16(bs);
-            #endif
-                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
-                bs_consumedBits = riceParamPartLoBitCount;
-            #ifndef DR_FLAC_NO_CRC
-                bs->crc16Cache = bs_cache;
-            #endif
-            } else {
-                /* Slow path. We need to fetch more data from the client. */
-                if (!drflac__reload_cache(bs)) {
-                    return DRFLAC_FALSE;
-                }
-
-                bs_cache = bs->cache;
-                bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
-            }
-
-            bs_cache <<= riceParamPartLoBitCount;
-        }
-    } else {
-        /*
-        Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call
-        to drflac__clz() and we need to reload the cache.
-        */
-        for (;;) {
-            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
-            #ifndef DR_FLAC_NO_CRC
-                drflac__update_crc16(bs);
-            #endif
-                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
-                bs_consumedBits = 0;
-            #ifndef DR_FLAC_NO_CRC
-                bs->crc16Cache = bs_cache;
-            #endif
-            } else {
-                /* Slow path. We need to fetch more data from the client. */
-                if (!drflac__reload_cache(bs)) {
-                    return DRFLAC_FALSE;
-                }
-
-                bs_cache = bs->cache;
-                bs_consumedBits = bs->consumedBits;
-            }
-
-            lzcount = drflac__clz(bs_cache);
-            if (lzcount < sizeof(bs_cache)*8) {
-                break;
-            }
-        }
-
-        goto extract_rice_param_part;
-    }
-
-    /* Make sure the cache is restored at the end of it all. */
-    bs->cache = bs_cache;
-    bs->consumedBits = bs_consumedBits;
-
-    return DRFLAC_TRUE;
-}
-
-
-static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar_zeroorder(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
-{
-    drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
-    drflac_uint32 zeroCountPart0;
-    drflac_uint32 riceParamPart0;
-    drflac_uint32 riceParamMask;
-    drflac_uint32 i;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
-    DRFLAC_ASSERT(pSamplesOut != NULL);
-
-    (void)bitsPerSample;
-    (void)order;
-    (void)shift;
-    (void)coefficients;
-
-    riceParamMask  = (drflac_uint32)~((~0UL) << riceParam);
-
-    i = 0;
-    while (i < count) {
-        /* Rice extraction. */
-        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) {
-            return DRFLAC_FALSE;
-        }
-
-        /* Rice reconstruction. */
-        riceParamPart0 &= riceParamMask;
-        riceParamPart0 |= (zeroCountPart0 << riceParam);
-        riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
-
-        pSamplesOut[i] = riceParamPart0;
-
-        i += 1;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
-{
-    drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
-    drflac_uint32 zeroCountPart0 = 0;
-    drflac_uint32 zeroCountPart1 = 0;
-    drflac_uint32 zeroCountPart2 = 0;
-    drflac_uint32 zeroCountPart3 = 0;
-    drflac_uint32 riceParamPart0 = 0;
-    drflac_uint32 riceParamPart1 = 0;
-    drflac_uint32 riceParamPart2 = 0;
-    drflac_uint32 riceParamPart3 = 0;
-    drflac_uint32 riceParamMask;
-    const drflac_int32* pSamplesOutEnd;
-    drflac_uint32 i;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
-    DRFLAC_ASSERT(pSamplesOut != NULL);
-
-    if (order == 0) {
-        return drflac__decode_samples_with_residual__rice__scalar_zeroorder(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
-    }
-
-    riceParamMask  = (drflac_uint32)~((~0UL) << riceParam);
-    pSamplesOutEnd = pSamplesOut + (count & ~3);
-
-    if (bitsPerSample+shift > 32) {
-        while (pSamplesOut < pSamplesOutEnd) {
-            /*
-            Rice extraction. It's faster to do this one at a time against local variables than it is to use the x4 version
-            against an array. Not sure why, but perhaps it's making more efficient use of registers?
-            */
-            if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) ||
-                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) ||
-                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) ||
-                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) {
-                return DRFLAC_FALSE;
-            }
-
-            riceParamPart0 &= riceParamMask;
-            riceParamPart1 &= riceParamMask;
-            riceParamPart2 &= riceParamMask;
-            riceParamPart3 &= riceParamMask;
-
-            riceParamPart0 |= (zeroCountPart0 << riceParam);
-            riceParamPart1 |= (zeroCountPart1 << riceParam);
-            riceParamPart2 |= (zeroCountPart2 << riceParam);
-            riceParamPart3 |= (zeroCountPart3 << riceParam);
-
-            riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
-            riceParamPart1  = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01];
-            riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
-            riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
-
-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
-            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 1);
-            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 2);
-            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 3);
-
-            pSamplesOut += 4;
-        }
-    } else {
-        while (pSamplesOut < pSamplesOutEnd) {
-            if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) ||
-                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) ||
-                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) ||
-                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) {
-                return DRFLAC_FALSE;
-            }
-
-            riceParamPart0 &= riceParamMask;
-            riceParamPart1 &= riceParamMask;
-            riceParamPart2 &= riceParamMask;
-            riceParamPart3 &= riceParamMask;
-
-            riceParamPart0 |= (zeroCountPart0 << riceParam);
-            riceParamPart1 |= (zeroCountPart1 << riceParam);
-            riceParamPart2 |= (zeroCountPart2 << riceParam);
-            riceParamPart3 |= (zeroCountPart3 << riceParam);
-
-            riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
-            riceParamPart1  = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01];
-            riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
-            riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
-
-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
-            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 1);
-            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 2);
-            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 3);
-
-            pSamplesOut += 4;
-        }
-    }
-
-    i = (count & ~3);
-    while (i < count) {
-        /* Rice extraction. */
-        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) {
-            return DRFLAC_FALSE;
-        }
-
-        /* Rice reconstruction. */
-        riceParamPart0 &= riceParamMask;
-        riceParamPart0 |= (zeroCountPart0 << riceParam);
-        riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
-        /*riceParamPart0  = (riceParamPart0 >> 1) ^ (~(riceParamPart0 & 0x01) + 1);*/
-
-        /* Sample reconstruction. */
-        if (bitsPerSample+shift > 32) {
-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
-        } else {
-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
-        }
-
-        i += 1;
-        pSamplesOut += 1;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE __m128i drflac__mm_packs_interleaved_epi32(__m128i a, __m128i b)
-{
-    __m128i r;
-
-    /* Pack. */
-    r = _mm_packs_epi32(a, b);
-
-    /* a3a2 a1a0 b3b2 b1b0 -> a3a2 b3b2 a1a0 b1b0 */
-    r = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 1, 2, 0));
-
-    /* a3a2 b3b2 a1a0 b1b0 -> a3b3 a2b2 a1b1 a0b0 */
-    r = _mm_shufflehi_epi16(r, _MM_SHUFFLE(3, 1, 2, 0));
-    r = _mm_shufflelo_epi16(r, _MM_SHUFFLE(3, 1, 2, 0));
-
-    return r;
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_SSE41)
-static DRFLAC_INLINE __m128i drflac__mm_not_si128(__m128i a)
-{
-    return _mm_xor_si128(a, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()));
-}
-
-static DRFLAC_INLINE __m128i drflac__mm_hadd_epi32(__m128i x)
-{
-    __m128i x64 = _mm_add_epi32(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2)));
-    __m128i x32 = _mm_shufflelo_epi16(x64, _MM_SHUFFLE(1, 0, 3, 2));
-    return _mm_add_epi32(x64, x32);
-}
-
-static DRFLAC_INLINE __m128i drflac__mm_hadd_epi64(__m128i x)
-{
-    return _mm_add_epi64(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2)));
-}
-
-static DRFLAC_INLINE __m128i drflac__mm_srai_epi64(__m128i x, int count)
-{
-    /*
-    To simplify this we are assuming count < 32. This restriction allows us to work on a low side and a high side. The low side
-    is shifted with zero bits, whereas the right side is shifted with sign bits.
-    */
-    __m128i lo = _mm_srli_epi64(x, count);
-    __m128i hi = _mm_srai_epi32(x, count);
-
-    hi = _mm_and_si128(hi, _mm_set_epi32(0xFFFFFFFF, 0, 0xFFFFFFFF, 0));    /* The high part needs to have the low part cleared. */
-
-    return _mm_or_si128(lo+(5*GetBass()), hi);
-}
-
-static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_32(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
-{
-    int i;
-    drflac_uint32 riceParamMask;
-    drflac_int32* pDecodedSamples    = pSamplesOut;
-    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
-    drflac_uint32 zeroCountParts0 = 0;
-    drflac_uint32 zeroCountParts1 = 0;
-    drflac_uint32 zeroCountParts2 = 0;
-    drflac_uint32 zeroCountParts3 = 0;
-    drflac_uint32 riceParamParts0 = 0;
-    drflac_uint32 riceParamParts1 = 0;
-    drflac_uint32 riceParamParts2 = 0;
-    drflac_uint32 riceParamParts3 = 0;
-    __m128i coefficients128_0;
-    __m128i coefficients128_4;
-    __m128i coefficients128_8;
-    __m128i samples128_0;
-    __m128i samples128_4;
-    __m128i samples128_8;
-    __m128i riceParamMask128;
-
-    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
-
-    riceParamMask    = (drflac_uint32)~((~0UL) << riceParam);
-    riceParamMask128 = _mm_set1_epi32(riceParamMask);
-
-    /* Pre-load. */
-    coefficients128_0 = _mm_setzero_si128();
-    coefficients128_4 = _mm_setzero_si128();
-    coefficients128_8 = _mm_setzero_si128();
-
-    samples128_0 = _mm_setzero_si128();
-    samples128_4 = _mm_setzero_si128();
-    samples128_8 = _mm_setzero_si128();
-
-    /*
-    Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
-    what's available in the input buffers. It would be convenient to use a fall-through switch to do this, but this results
-    in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
-    so I think there's opportunity for this to be simplified.
-    */
-#if 1
-    {
-        int runningOrder = order;
-
-        /* 0 - 3. */
-        if (runningOrder >= 4) {
-            coefficients128_0 = _mm_loadu_si128((const __m128i*)(coefficients + 0));
-            samples128_0      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 4));
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0); break;
-                case 2: coefficients128_0 = _mm_set_epi32(0, 0,               coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0,               0); break;
-                case 1: coefficients128_0 = _mm_set_epi32(0, 0,               0,               coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0,               0,               0); break;
-            }
-            runningOrder = 0;
-        }
-
-        /* 4 - 7 */
-        if (runningOrder >= 4) {
-            coefficients128_4 = _mm_loadu_si128((const __m128i*)(coefficients + 4));
-            samples128_4      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 8));
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0); break;
-                case 2: coefficients128_4 = _mm_set_epi32(0, 0,               coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0,               0); break;
-                case 1: coefficients128_4 = _mm_set_epi32(0, 0,               0,               coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0,               0,               0); break;
-            }
-            runningOrder = 0;
-        }
-
-        /* 8 - 11 */
-        if (runningOrder == 4) {
-            coefficients128_8 = _mm_loadu_si128((const __m128i*)(coefficients + 8));
-            samples128_8      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 12));
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0); break;
-                case 2: coefficients128_8 = _mm_set_epi32(0, 0,                coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0,                0); break;
-                case 1: coefficients128_8 = _mm_set_epi32(0, 0,                0,               coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0,                0,                0); break;
-            }
-            runningOrder = 0;
-        }
-
-        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
-        coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3));
-        coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3));
-        coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3));
-    }
-#else
-    /* This causes strict-aliasing warnings with GCC. */
-    switch (order)
-    {
-    case 12: ((drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12];
-    case 11: ((drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11];
-    case 10: ((drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10];
-    case 9:  ((drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9];
-    case 8:  ((drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8];
-    case 7:  ((drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7];
-    case 6:  ((drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6];
-    case 5:  ((drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5];
-    case 4:  ((drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4];
-    case 3:  ((drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3];
-    case 2:  ((drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2];
-    case 1:  ((drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1];
-    }
-#endif
-
-    /* For this version we are doing one sample at a time. */
-    while (pDecodedSamples < pDecodedSamplesEnd) {
-        __m128i prediction128;
-        __m128i zeroCountPart128;
-        __m128i riceParamPart128;
-
-        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) {
-            return DRFLAC_FALSE;
-        }
-
-        zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0);
-        riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0);
-
-        riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128);
-        riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam));
-        riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01))), _mm_set1_epi32(0x01)));  /* <-- SSE2 compatible */
-        /*riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_mullo_epi32(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01)), _mm_set1_epi32(0xFFFFFFFF)));*/   /* <-- Only supported from SSE4.1 and is slower in my testing... */
-
-        if (order <= 4) {
-            for (i = 0; i < 4; i += 1) {
-                prediction128 = _mm_mullo_epi32(coefficients128_0, samples128_0);
-
-                /* Horizontal add and shift. */
-                prediction128 = drflac__mm_hadd_epi32(prediction128);
-                prediction128 = _mm_srai_epi32(prediction128, shift);
-                prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
-
-                samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
-                riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
-            }
-        } else if (order <= 8) {
-            for (i = 0; i < 4; i += 1) {
-                prediction128 =                              _mm_mullo_epi32(coefficients128_4, samples128_4);
-                prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0));
-
-                /* Horizontal add and shift. */
-                prediction128 = drflac__mm_hadd_epi32(prediction128);
-                prediction128 = _mm_srai_epi32(prediction128, shift);
-                prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
-
-                samples128_4 = _mm_alignr_epi8(samples128_0,  samples128_4, 4);
-                samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
-                riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
-            }
-        } else {
-            for (i = 0; i < 4; i += 1) {
-                prediction128 =                              _mm_mullo_epi32(coefficients128_8, samples128_8);
-                prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_4, samples128_4));
-                prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0));
-
-                /* Horizontal add and shift. */
-                prediction128 = drflac__mm_hadd_epi32(prediction128);
-                prediction128 = _mm_srai_epi32(prediction128, shift);
-                prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
-
-                samples128_8 = _mm_alignr_epi8(samples128_4,  samples128_8, 4);
-                samples128_4 = _mm_alignr_epi8(samples128_0,  samples128_4, 4);
-                samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
-                riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
-            }
-        }
-
-        /* We store samples in groups of 4. */
-        _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0);
-        pDecodedSamples += 4;
-    }
-
-    /* Make sure we process the last few samples. */
-    i = (count & ~3);
-    while (i < (int)count) {
-        /* Rice extraction. */
-        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) {
-            return DRFLAC_FALSE;
-        }
-
-        /* Rice reconstruction. */
-        riceParamParts0 &= riceParamMask;
-        riceParamParts0 |= (zeroCountParts0 << riceParam);
-        riceParamParts0  = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01];
-
-        /* Sample reconstruction. */
-        pDecodedSamples[0] = riceParamParts0 + drflac__calculate_prediction_32(order, shift, coefficients, pDecodedSamples);
-
-        i += 1;
-        pDecodedSamples += 1;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_64(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
-{
-    int i;
-    drflac_uint32 riceParamMask;
-    drflac_int32* pDecodedSamples    = pSamplesOut;
-    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
-    drflac_uint32 zeroCountParts0 = 0;
-    drflac_uint32 zeroCountParts1 = 0;
-    drflac_uint32 zeroCountParts2 = 0;
-    drflac_uint32 zeroCountParts3 = 0;
-    drflac_uint32 riceParamParts0 = 0;
-    drflac_uint32 riceParamParts1 = 0;
-    drflac_uint32 riceParamParts2 = 0;
-    drflac_uint32 riceParamParts3 = 0;
-    __m128i coefficients128_0;
-    __m128i coefficients128_4;
-    __m128i coefficients128_8;
-    __m128i samples128_0;
-    __m128i samples128_4;
-    __m128i samples128_8;
-    __m128i prediction128;
-    __m128i riceParamMask128;
-
-    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
-
-    DRFLAC_ASSERT(order <= 12);
-
-    riceParamMask    = (drflac_uint32)~((~0UL) << riceParam);
-    riceParamMask128 = _mm_set1_epi32(riceParamMask);
-
-    prediction128 = _mm_setzero_si128();
-
-    /* Pre-load. */
-    coefficients128_0  = _mm_setzero_si128();
-    coefficients128_4  = _mm_setzero_si128();
-    coefficients128_8  = _mm_setzero_si128();
-
-    samples128_0  = _mm_setzero_si128();
-    samples128_4  = _mm_setzero_si128();
-    samples128_8  = _mm_setzero_si128();
-
-#if 1
-    {
-        int runningOrder = order;
-
-        /* 0 - 3. */
-        if (runningOrder >= 4) {
-            coefficients128_0 = _mm_loadu_si128((const __m128i*)(coefficients + 0));
-            samples128_0      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 4));
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0); break;
-                case 2: coefficients128_0 = _mm_set_epi32(0, 0,               coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0,               0); break;
-                case 1: coefficients128_0 = _mm_set_epi32(0, 0,               0,               coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0,               0,               0); break;
-            }
-            runningOrder = 0;
-        }
-
-        /* 4 - 7 */
-        if (runningOrder >= 4) {
-            coefficients128_4 = _mm_loadu_si128((const __m128i*)(coefficients + 4));
-            samples128_4      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 8));
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0); break;
-                case 2: coefficients128_4 = _mm_set_epi32(0, 0,               coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0,               0); break;
-                case 1: coefficients128_4 = _mm_set_epi32(0, 0,               0,               coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0,               0,               0); break;
-            }
-            runningOrder = 0;
-        }
-
-        /* 8 - 11 */
-        if (runningOrder == 4) {
-            coefficients128_8 = _mm_loadu_si128((const __m128i*)(coefficients + 8));
-            samples128_8      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 12));
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0); break;
-                case 2: coefficients128_8 = _mm_set_epi32(0, 0,                coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0,                0); break;
-                case 1: coefficients128_8 = _mm_set_epi32(0, 0,                0,               coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0,                0,                0); break;
-            }
-            runningOrder = 0;
-        }
-
-        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
-        coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3));
-        coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3));
-        coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3));
-    }
-#else
-    switch (order)
-    {
-    case 12: ((drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12];
-    case 11: ((drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11];
-    case 10: ((drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10];
-    case 9:  ((drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9];
-    case 8:  ((drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8];
-    case 7:  ((drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7];
-    case 6:  ((drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6];
-    case 5:  ((drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5];
-    case 4:  ((drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4];
-    case 3:  ((drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3];
-    case 2:  ((drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2];
-    case 1:  ((drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1];
-    }
-#endif
-
-    /* For this version we are doing one sample at a time. */
-    while (pDecodedSamples < pDecodedSamplesEnd) {
-        __m128i zeroCountPart128;
-        __m128i riceParamPart128;
-
-        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) {
-            return DRFLAC_FALSE;
-        }
-
-        zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0);
-        riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0);
-
-        riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128);
-        riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam));
-        riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(1))), _mm_set1_epi32(1)));
-
-        for (i = 0; i < 4; i += 1) {
-            prediction128 = _mm_xor_si128(prediction128, prediction128);    /* Reset to 0. */
-
-            switch (order)
-            {
-            case 12:
-            case 11: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(1, 1, 0, 0))));
-            case 10:
-            case  9: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(3, 3, 2, 2))));
-            case  8:
-            case  7: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(1, 1, 0, 0))));
-            case  6:
-            case  5: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(3, 3, 2, 2))));
-            case  4:
-            case  3: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(1, 1, 0, 0))));
-            case  2:
-            case  1: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(3, 3, 2, 2))));
-            }
-
-            /* Horizontal add and shift. */
-            prediction128 = drflac__mm_hadd_epi64(prediction128);
-            prediction128 = drflac__mm_srai_epi64(prediction128, shift);
-            prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
-
-            /* Our value should be sitting in prediction128[0]. We need to combine this with our SSE samples. */
-            samples128_8 = _mm_alignr_epi8(samples128_4,  samples128_8, 4);
-            samples128_4 = _mm_alignr_epi8(samples128_0,  samples128_4, 4);
-            samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
-
-            /* Slide our rice parameter down so that the value in position 0 contains the next one to process. */
-            riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
-        }
-
-        /* We store samples in groups of 4. */
-        _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0);
-        pDecodedSamples += 4;
-    }
-
-    /* Make sure we process the last few samples. */
-    i = (count & ~3);
-    while (i < (int)count) {
-        /* Rice extraction. */
-        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) {
-            return DRFLAC_FALSE;
-        }
-
-        /* Rice reconstruction. */
-        riceParamParts0 &= riceParamMask;
-        riceParamParts0 |= (zeroCountParts0 << riceParam);
-        riceParamParts0  = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01];
-
-        /* Sample reconstruction. */
-        pDecodedSamples[0] = riceParamParts0 + drflac__calculate_prediction_64(order, shift, coefficients, pDecodedSamples);
-
-        i += 1;
-        pDecodedSamples += 1;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
-{
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
-    DRFLAC_ASSERT(pSamplesOut != NULL);
-
-    /* In my testing the order is rarely > 12, so in this case I'm going to simplify the SSE implementation by only handling order <= 12. */
-    if (order > 0 && order <= 12) {
-        if (bitsPerSample+shift > 32) {
-            return drflac__decode_samples_with_residual__rice__sse41_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
-        } else {
-            return drflac__decode_samples_with_residual__rice__sse41_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
-        }
-    } else {
-        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac__vst2q_s32(drflac_int32* p, int32x4x2_t x)
-{
-    vst1q_s32(p+0, x.val[0]);
-    vst1q_s32(p+4, x.val[1]);
-}
-
-static DRFLAC_INLINE void drflac__vst2q_u32(drflac_uint32* p, uint32x4x2_t x)
-{
-    vst1q_u32(p+0, x.val[0]);
-    vst1q_u32(p+4, x.val[1]);
-}
-
-static DRFLAC_INLINE void drflac__vst2q_f32(double* p, float32x4x2_t x)
-{
-    vst1q_f32(p+0, x.val[0]);
-    vst1q_f32(p+4, x.val[1]);
-}
-
-static DRFLAC_INLINE void drflac__vst2q_s16(drflac_int16* p, int16x4x2_t x)
-{
-    vst1q_s16(p, vcombine_s16(x.val[0], x.val[1]));
-}
-
-static DRFLAC_INLINE void drflac__vst2q_u16(drflac_uint16* p, uint16x4x2_t x)
-{
-    vst1q_u16(p, vcombine_u16(x.val[0], x.val[1]));
-}
-
-static DRFLAC_INLINE int32x4_t drflac__vdupq_n_s32x4(drflac_int32 x3, drflac_int32 x2, drflac_int32 x1, drflac_int32 x0)
-{
-    drflac_int32 x[4];
-    x[3] = x3;
-    x[2] = x2;
-    x[1] = x1;
-    x[0] = x0;
-    return vld1q_s32(x);
-}
-
-static DRFLAC_INLINE int32x4_t drflac__valignrq_s32_1(int32x4_t a, int32x4_t b)
-{
-    /* Equivalent to SSE's _mm_alignr_epi8(a, b, 4) */
-
-    /* Reference */
-    /*return drflac__vdupq_n_s32x4(
-        vgetq_lane_s32(a, 0),
-        vgetq_lane_s32(b, 3),
-        vgetq_lane_s32(b, 2),
-        vgetq_lane_s32(b, 1)
-    );*/
-
-    return vextq_s32(b, a, 1);
-}
-
-static DRFLAC_INLINE uint32x4_t drflac__valignrq_u32_1(uint32x4_t a, uint32x4_t b)
-{
-    /* Equivalent to SSE's _mm_alignr_epi8(a, b, 4) */
-
-    /* Reference */
-    /*return drflac__vdupq_n_s32x4(
-        vgetq_lane_s32(a, 0),
-        vgetq_lane_s32(b, 3),
-        vgetq_lane_s32(b, 2),
-        vgetq_lane_s32(b, 1)
-    );*/
-
-    return vextq_u32(b, a, 1);
-}
-
-static DRFLAC_INLINE int32x2_t drflac__vhaddq_s32(int32x4_t x)
-{
-    /* The sum must end up in position 0. */
-
-    /* Reference */
-    /*return vdupq_n_s32(
-        vgetq_lane_s32(x, 3) +
-        vgetq_lane_s32(x, 2) +
-        vgetq_lane_s32(x, 1) +
-        vgetq_lane_s32(x, 0)
-    );*/
-
-    int32x2_t r = vadd_s32(vget_high_s32(x), vget_low_s32(x));
-    return vpadd_s32(r, r);
-}
-
-static DRFLAC_INLINE int64x1_t drflac__vhaddq_s64(int64x2_t x)
-{
-    return vadd_s64(vget_high_s64(x), vget_low_s64(x));
-}
-
-static DRFLAC_INLINE int32x4_t drflac__vrevq_s32(int32x4_t x)
-{
-    /* Reference */
-    /*return drflac__vdupq_n_s32x4(
-        vgetq_lane_s32(x, 0),
-        vgetq_lane_s32(x, 1),
-        vgetq_lane_s32(x, 2),
-        vgetq_lane_s32(x, 3)
-    );*/
-
-    return vrev64q_s32(vcombine_s32(vget_high_s32(x), vget_low_s32(x)));
-}
-
-static DRFLAC_INLINE int32x4_t drflac__vnotq_s32(int32x4_t x)
-{
-    return veorq_s32(x, vdupq_n_s32(0xFFFFFFFF));
-}
-
-static DRFLAC_INLINE uint32x4_t drflac__vnotq_u32(uint32x4_t x)
-{
-    return veorq_u32(x, vdupq_n_u32(0xFFFFFFFF));
-}
-
-static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_32(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
-{
-    int i;
-    drflac_uint32 riceParamMask;
-    drflac_int32* pDecodedSamples    = pSamplesOut;
-    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
-    drflac_uint32 zeroCountParts[4];
-    drflac_uint32 riceParamParts[4];
-    int32x4_t coefficients128_0;
-    int32x4_t coefficients128_4;
-    int32x4_t coefficients128_8;
-    int32x4_t samples128_0;
-    int32x4_t samples128_4;
-    int32x4_t samples128_8;
-    uint32x4_t riceParamMask128;
-    int32x4_t riceParam128;
-    int32x2_t shift64;
-    uint32x4_t one128;
-
-    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
-
-    riceParamMask    = ~((~0UL) << riceParam);
-    riceParamMask128 = vdupq_n_u32(riceParamMask);
-
-    riceParam128 = vdupq_n_s32(riceParam);
-    shift64 = vdup_n_s32(-shift); /* Negate the shift because we'll be doing a variable shift using vshlq_s32(). */
-    one128 = vdupq_n_u32(1);
-
-    /*
-    Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
-    what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results
-    in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
-    so I think there's opportunity for this to be simplified.
-    */
-    {
-        int runningOrder = order;
-        drflac_int32 tempC[4] = {0, 0, 0, 0};
-        drflac_int32 tempS[4] = {0, 0, 0, 0};
-
-        /* 0 - 3. */
-        if (runningOrder >= 4) {
-            coefficients128_0 = vld1q_s32(coefficients + 0);
-            samples128_0      = vld1q_s32(pSamplesOut  - 4);
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3]; /* fallthrough */
-                case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2]; /* fallthrough */
-                case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1]; /* fallthrough */
-            }
-
-            coefficients128_0 = vld1q_s32(tempC);
-            samples128_0      = vld1q_s32(tempS);
-            runningOrder = 0;
-        }
-
-        /* 4 - 7 */
-        if (runningOrder >= 4) {
-            coefficients128_4 = vld1q_s32(coefficients + 4);
-            samples128_4      = vld1q_s32(pSamplesOut  - 8);
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7]; /* fallthrough */
-                case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6]; /* fallthrough */
-                case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5]; /* fallthrough */
-            }
-
-            coefficients128_4 = vld1q_s32(tempC);
-            samples128_4      = vld1q_s32(tempS);
-            runningOrder = 0;
-        }
-
-        /* 8 - 11 */
-        if (runningOrder == 4) {
-            coefficients128_8 = vld1q_s32(coefficients + 8);
-            samples128_8      = vld1q_s32(pSamplesOut  - 12);
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11]; /* fallthrough */
-                case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10]; /* fallthrough */
-                case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9]; /* fallthrough */
-            }
-
-            coefficients128_8 = vld1q_s32(tempC);
-            samples128_8      = vld1q_s32(tempS);
-            runningOrder = 0;
-        }
-
-        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
-        coefficients128_0 = drflac__vrevq_s32(coefficients128_0);
-        coefficients128_4 = drflac__vrevq_s32(coefficients128_4);
-        coefficients128_8 = drflac__vrevq_s32(coefficients128_8);
-    }
-
-    /* For this version we are doing one sample at a time. */
-    while (pDecodedSamples < pDecodedSamplesEnd) {
-        int32x4_t prediction128;
-        int32x2_t prediction64;
-        uint32x4_t zeroCountPart128;
-        uint32x4_t riceParamPart128;
-
-        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[3], &riceParamParts[3])) {
-            return DRFLAC_FALSE;
-        }
-
-        zeroCountPart128 = vld1q_u32(zeroCountParts);
-        riceParamPart128 = vld1q_u32(riceParamParts);
-
-        riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128);
-        riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128));
-        riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128));
-
-        if (order <= 4) {
-            for (i = 0; i < 4; i += 1) {
-                prediction128 = vmulq_s32(coefficients128_0, samples128_0);
-
-                /* Horizontal add and shift. */
-                prediction64 = drflac__vhaddq_s32(prediction128);
-                prediction64 = vshl_s32(prediction64, shift64);
-                prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
-
-                samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
-                riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
-            }
-        } else if (order <= 8) {
-            for (i = 0; i < 4; i += 1) {
-                prediction128 =                vmulq_s32(coefficients128_4, samples128_4);
-                prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0);
-
-                /* Horizontal add and shift. */
-                prediction64 = drflac__vhaddq_s32(prediction128);
-                prediction64 = vshl_s32(prediction64, shift64);
-                prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
-
-                samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
-                samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
-                riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
-            }
-        } else {
-            for (i = 0; i < 4; i += 1) {
-                prediction128 =                vmulq_s32(coefficients128_8, samples128_8);
-                prediction128 = vmlaq_s32(prediction128, coefficients128_4, samples128_4);
-                prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0);
-
-                /* Horizontal add and shift. */
-                prediction64 = drflac__vhaddq_s32(prediction128);
-                prediction64 = vshl_s32(prediction64, shift64);
-                prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
-
-                samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8);
-                samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
-                samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
-                riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
-            }
-        }
-
-        /* We store samples in groups of 4. */
-        vst1q_s32(pDecodedSamples, samples128_0);
-        pDecodedSamples += 4;
-    }
-
-    /* Make sure we process the last few samples. */
-    i = (count & ~3);
-    while (i < (int)count) {
-        /* Rice extraction. */
-        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0])) {
-            return DRFLAC_FALSE;
-        }
-
-        /* Rice reconstruction. */
-        riceParamParts[0] &= riceParamMask;
-        riceParamParts[0] |= (zeroCountParts[0] << riceParam);
-        riceParamParts[0]  = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01];
-
-        /* Sample reconstruction. */
-        pDecodedSamples[0] = riceParamParts[0] + drflac__calculate_prediction_32(order, shift, coefficients, pDecodedSamples);
-
-        i += 1;
-        pDecodedSamples += 1;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
-{
-    int i;
-    drflac_uint32 riceParamMask;
-    drflac_int32* pDecodedSamples    = pSamplesOut;
-    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
-    drflac_uint32 zeroCountParts[4];
-    drflac_uint32 riceParamParts[4];
-    int32x4_t coefficients128_0;
-    int32x4_t coefficients128_4;
-    int32x4_t coefficients128_8;
-    int32x4_t samples128_0;
-    int32x4_t samples128_4;
-    int32x4_t samples128_8;
-    uint32x4_t riceParamMask128;
-    int32x4_t riceParam128;
-    int64x1_t shift64;
-    uint32x4_t one128;
-
-    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
-
-    riceParamMask    = ~((~0UL) << riceParam);
-    riceParamMask128 = vdupq_n_u32(riceParamMask);
-
-    riceParam128 = vdupq_n_s32(riceParam);
-    shift64 = vdup_n_s64(-shift); /* Negate the shift because we'll be doing a variable shift using vshlq_s32(). */
-    one128 = vdupq_n_u32(1);
-
-    /*
-    Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
-    what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results
-    in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
-    so I think there's opportunity for this to be simplified.
-    */
-    {
-        int runningOrder = order;
-        drflac_int32 tempC[4] = {0, 0, 0, 0};
-        drflac_int32 tempS[4] = {0, 0, 0, 0};
-
-        /* 0 - 3. */
-        if (runningOrder >= 4) {
-            coefficients128_0 = vld1q_s32(coefficients + 0);
-            samples128_0      = vld1q_s32(pSamplesOut  - 4);
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3]; /* fallthrough */
-                case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2]; /* fallthrough */
-                case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1]; /* fallthrough */
-            }
-
-            coefficients128_0 = vld1q_s32(tempC);
-            samples128_0      = vld1q_s32(tempS);
-            runningOrder = 0;
-        }
-
-        /* 4 - 7 */
-        if (runningOrder >= 4) {
-            coefficients128_4 = vld1q_s32(coefficients + 4);
-            samples128_4      = vld1q_s32(pSamplesOut  - 8);
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7]; /* fallthrough */
-                case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6]; /* fallthrough */
-                case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5]; /* fallthrough */
-            }
-
-            coefficients128_4 = vld1q_s32(tempC);
-            samples128_4      = vld1q_s32(tempS);
-            runningOrder = 0;
-        }
-
-        /* 8 - 11 */
-        if (runningOrder == 4) {
-            coefficients128_8 = vld1q_s32(coefficients + 8);
-            samples128_8      = vld1q_s32(pSamplesOut  - 12);
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11]; /* fallthrough */
-                case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10]; /* fallthrough */
-                case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9]; /* fallthrough */
-            }
-
-            coefficients128_8 = vld1q_s32(tempC);
-            samples128_8      = vld1q_s32(tempS);
-            runningOrder = 0;
-        }
-
-        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
-        coefficients128_0 = drflac__vrevq_s32(coefficients128_0);
-        coefficients128_4 = drflac__vrevq_s32(coefficients128_4);
-        coefficients128_8 = drflac__vrevq_s32(coefficients128_8);
-    }
-
-    /* For this version we are doing one sample at a time. */
-    while (pDecodedSamples < pDecodedSamplesEnd) {
-        int64x2_t prediction128;
-        uint32x4_t zeroCountPart128;
-        uint32x4_t riceParamPart128;
-
-        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[3], &riceParamParts[3])) {
-            return DRFLAC_FALSE;
-        }
-
-        zeroCountPart128 = vld1q_u32(zeroCountParts);
-        riceParamPart128 = vld1q_u32(riceParamParts);
-
-        riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128);
-        riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128));
-        riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128));
-
-        for (i = 0; i < 4; i += 1) {
-            int64x1_t prediction64;
-
-            prediction128 = veorq_s64(prediction128, prediction128);    /* Reset to 0. */
-            switch (order)
-            {
-            case 12:
-            case 11: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_8), vget_low_s32(samples128_8)));
-            case 10:
-            case  9: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_8), vget_high_s32(samples128_8)));
-            case  8:
-            case  7: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_4), vget_low_s32(samples128_4)));
-            case  6:
-            case  5: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_4), vget_high_s32(samples128_4)));
-            case  4:
-            case  3: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_0), vget_low_s32(samples128_0)));
-            case  2:
-            case  1: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_0), vget_high_s32(samples128_0)));
-            }
-
-            /* Horizontal add and shift. */
-            prediction64 = drflac__vhaddq_s64(prediction128);
-            prediction64 = vshl_s64(prediction64, shift64);
-            prediction64 = vadd_s64(prediction64, vdup_n_s64(vgetq_lane_u32(riceParamPart128, 0)));
-
-            /* Our value should be sitting in prediction64[0]. We need to combine this with our SSE samples. */
-            samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8);
-            samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
-            samples128_0 = drflac__valignrq_s32_1(vcombine_s32(vreinterpret_s32_s64(prediction64), vdup_n_s32(0)), samples128_0);
-
-            /* Slide our rice parameter down so that the value in position 0 contains the next one to process. */
-            riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
-        }
-
-        /* We store samples in groups of 4. */
-        vst1q_s32(pDecodedSamples, samples128_0);
-        pDecodedSamples += 4;
-    }
-
-    /* Make sure we process the last few samples. */
-    i = (count & ~3);
-    while (i < (int)count) {
-        /* Rice extraction. */
-        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0])) {
-            return DRFLAC_FALSE;
-        }
-
-        /* Rice reconstruction. */
-        riceParamParts[0] &= riceParamMask;
-        riceParamParts[0] |= (zeroCountParts[0] << riceParam);
-        riceParamParts[0]  = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01];
-
-        /* Sample reconstruction. */
-        pDecodedSamples[0] = riceParamParts[0] + drflac__calculate_prediction_64(order, shift, coefficients, pDecodedSamples);
-
-        i += 1;
-        pDecodedSamples += 1;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__decode_samples_with_residual__rice__neon(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
-{
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
-    DRFLAC_ASSERT(pSamplesOut != NULL);
-
-    /* In my testing the order is rarely > 12, so in this case I'm going to simplify the NEON implementation by only handling order <= 12. */
-    if (order > 0 && order <= 12) {
-        if (bitsPerSample+shift > 32) {
-            return drflac__decode_samples_with_residual__rice__neon_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
-        } else {
-            return drflac__decode_samples_with_residual__rice__neon_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
-        }
-    } else {
-        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
-    }
-}
-#endif
-
-static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
-{
-#if defined(DRFLAC_SUPPORT_SSE41)
-    if (drflac__gIsSSE41Supported) {
-        return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported) {
-        return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-    #if 0
-        return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
-    #else
-        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
-    #endif
-    }
-}
-
-/* Reads and seeks past a string of residual values as Rice codes. The decoder should be sitting on the first bit of the Rice codes. */
-static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam)
-{
-    drflac_uint32 i;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
-
-    for (i = 0; i < count; ++i) {
-        if (!drflac__seek_rice_parts(bs, riceParam)) {
-            return DRFLAC_FALSE;
-        }
-    }
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
-{
-    drflac_uint32 i;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
-    DRFLAC_ASSERT(unencodedBitsPerSample <= 31);    /* <-- unencodedBitsPerSample is a 5 bit number, so cannot exceed 31. */
-    DRFLAC_ASSERT(pSamplesOut != NULL);
-
-    for (i = 0; i < count; ++i) {
-        if (unencodedBitsPerSample > 0) {
-            if (!drflac__read_int32(bs, unencodedBitsPerSample, pSamplesOut + i)) {
-                return DRFLAC_FALSE;
-            }
-        } else {
-            pSamplesOut[i] = 0;
-        }
-
-        if (bitsPerSample >= 24) {
-            pSamplesOut[i] += drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i);
-        } else {
-            pSamplesOut[i] += drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i);
-        }
-    }
-
-    return DRFLAC_TRUE;
-}
-
-
-/*
-Reads and decodes the residual for the sub-frame the decoder is currently sitting on. This function should be called
-when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be ignored. The
-<blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
-*/
-static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
-{
-    drflac_uint8 residualMethod;
-    drflac_uint8 partitionOrder;
-    drflac_uint32 samplesInPartition;
-    drflac_uint32 partitionsRemaining;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(blockSize != 0);
-    DRFLAC_ASSERT(pDecodedSamples != NULL);       /* <-- Should we allow NULL, in which case we just seek past the residual rather than do a full decode? */
-
-    if (!drflac__read_uint8(bs, 2, &residualMethod)) {
-        return DRFLAC_FALSE;
-    }
-
-    if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
-        return DRFLAC_FALSE;    /* Unknown or unsupported residual coding method. */
-    }
-
-    /* Ignore the first <order> values. */
-    pDecodedSamples += order;
-
-    if (!drflac__read_uint8(bs, 4, &partitionOrder)) {
-        return DRFLAC_FALSE;
-    }
-
-    /*
-    From the FLAC spec:
-      The Rice partition order in a Rice-coded residual section must be less than or equal to 8.
-    */
-    if (partitionOrder > 8) {
-        return DRFLAC_FALSE;
-    }
-
-    /* Validation check. */
-    if ((blockSize / (1 << partitionOrder)) <= order) {
-        return DRFLAC_FALSE;
-    }
-
-    samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
-    partitionsRemaining = (1 << partitionOrder);
-    for (;;) {
-        drflac_uint8 riceParam = 0;
-        if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) {
-            if (!drflac__read_uint8(bs, 4, &riceParam)) {
-                return DRFLAC_FALSE;
-            }
-            if (riceParam == 15) {
-                riceParam = 0xFF;
-            }
-        } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
-            if (!drflac__read_uint8(bs, 5, &riceParam)) {
-                return DRFLAC_FALSE;
-            }
-            if (riceParam == 31) {
-                riceParam = 0xFF;
-            }
-        }
-
-        if (riceParam != 0xFF) {
-            if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, order, shift, coefficients, pDecodedSamples)) {
-                return DRFLAC_FALSE;
-            }
-        } else {
-            drflac_uint8 unencodedBitsPerSample = 0;
-            if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) {
-                return DRFLAC_FALSE;
-            }
-
-            if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, order, shift, coefficients, pDecodedSamples)) {
-                return DRFLAC_FALSE;
-            }
-        }
-
-        pDecodedSamples += samplesInPartition;
-
-        if (partitionsRemaining == 1) {
-            break;
-        }
-
-        partitionsRemaining -= 1;
-
-        if (partitionOrder != 0) {
-            samplesInPartition = blockSize / (1 << partitionOrder);
-        }
-    }
-
-    return DRFLAC_TRUE;
-}
-
-/*
-Reads and seeks past the residual for the sub-frame the decoder is currently sitting on. This function should be called
-when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be set to 0. The
-<blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
-*/
-static drflac_bool32 drflac__read_and_seek_residual(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 order)
-{
-    drflac_uint8 residualMethod;
-    drflac_uint8 partitionOrder;
-    drflac_uint32 samplesInPartition;
-    drflac_uint32 partitionsRemaining;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(blockSize != 0);
-
-    if (!drflac__read_uint8(bs, 2, &residualMethod)) {
-        return DRFLAC_FALSE;
-    }
-
-    if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
-        return DRFLAC_FALSE;    /* Unknown or unsupported residual coding method. */
-    }
-
-    if (!drflac__read_uint8(bs, 4, &partitionOrder)) {
-        return DRFLAC_FALSE;
-    }
-
-    /*
-    From the FLAC spec:
-      The Rice partition order in a Rice-coded residual section must be less than or equal to 8.
-    */
-    if (partitionOrder > 8) {
-        return DRFLAC_FALSE;
-    }
-
-    /* Validation check. */
-    if ((blockSize / (1 << partitionOrder)) <= order) {
-        return DRFLAC_FALSE;
-    }
-
-    samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
-    partitionsRemaining = (1 << partitionOrder);
-    for (;;)
-    {
-        drflac_uint8 riceParam = 0;
-        if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) {
-            if (!drflac__read_uint8(bs, 4, &riceParam)) {
-                return DRFLAC_FALSE;
-            }
-            if (riceParam == 15) {
-                riceParam = 0xFF;
-            }
-        } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
-            if (!drflac__read_uint8(bs, 5, &riceParam)) {
-                return DRFLAC_FALSE;
-            }
-            if (riceParam == 31) {
-                riceParam = 0xFF;
-            }
-        }
-
-        if (riceParam != 0xFF) {
-            if (!drflac__read_and_seek_residual__rice(bs, samplesInPartition, riceParam)) {
-                return DRFLAC_FALSE;
-            }
-        } else {
-            drflac_uint8 unencodedBitsPerSample = 0;
-            if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) {
-                return DRFLAC_FALSE;
-            }
-
-            if (!drflac__seek_bits(bs, unencodedBitsPerSample * samplesInPartition)) {
-                return DRFLAC_FALSE;
-            }
-        }
-
-
-        if (partitionsRemaining == 1) {
-            break;
-        }
-
-        partitionsRemaining -= 1;
-        samplesInPartition = blockSize / (1 << partitionOrder);
-    }
-
-    return DRFLAC_TRUE;
-}
-
-
-static drflac_bool32 drflac__decode_samples__constant(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_int32* pDecodedSamples)
-{
-    drflac_uint32 i;
-
-    /* Only a single sample needs to be decoded here. */
-    drflac_int32 sample;
-    if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) {
-        return DRFLAC_FALSE;
-    }
-
-    /*
-    We don't really need to expand this, but it does simplify the process of reading samples. If this becomes a performance issue (unlikely)
-    we'll want to look at a more efficient way.
-    */
-    for (i = 0; i < blockSize; ++i) {
-        pDecodedSamples[i] = sample;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__decode_samples__verbatim(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_int32* pDecodedSamples)
-{
-    drflac_uint32 i;
-
-    for (i = 0; i < blockSize; ++i) {
-        drflac_int32 sample;
-        if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) {
-            return DRFLAC_FALSE;
-        }
-
-        pDecodedSamples[i] = sample;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples)
-{
-    drflac_uint32 i;
-
-    static drflac_int32 lpcCoefficientsTable[5][4] = {
-        {0,  0, 0,  0},
-        {1,  0, 0,  0},
-        {2, -1, 0,  0},
-        {3, -3, 1,  0},
-        {4, -6, 4, -1}
-    };
-
-    /* Warm up samples and coefficients. */
-    for (i = 0; i < lpcOrder; ++i) {
-        drflac_int32 sample;
-        if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) {
-            return DRFLAC_FALSE;
-        }
-
-        pDecodedSamples[i] = sample;
-    }
-
-    if (!drflac__decode_samples_with_residual(bs, subframeBitsPerSample, blockSize, lpcOrder, 0, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) {
-        return DRFLAC_FALSE;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples)
-{
-    drflac_uint8 i;
-    drflac_uint8 lpcPrecision;
-    drflac_int8 lpcShift;
-    drflac_int32 coefficients[32];
-
-    /* Warm up samples. */
-    for (i = 0; i < lpcOrder; ++i) {
-        drflac_int32 sample;
-        if (!drflac__read_int32(bs, bitsPerSample, &sample)) {
-            return DRFLAC_FALSE;
-        }
-
-        pDecodedSamples[i] = sample;
-    }
-
-    if (!drflac__read_uint8(bs, 4, &lpcPrecision)) {
-        return DRFLAC_FALSE;
-    }
-    if (lpcPrecision == 15) {
-        return DRFLAC_FALSE;    /* Invalid. */
-    }
-    lpcPrecision += 1;
-
-    if (!drflac__read_int8(bs, 5, &lpcShift)) {
-        return DRFLAC_FALSE;
-    }
-
-    DRFLAC_ZERO_MEMORY(coefficients, sizeof(coefficients));
-    for (i = 0; i < lpcOrder; ++i) {
-        if (!drflac__read_int32(bs, lpcPrecision, coefficients + i)) {
-            return DRFLAC_FALSE;
-        }
-    }
-
-    if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, coefficients, pDecodedSamples)) {
-        return DRFLAC_FALSE;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-
-static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_uint8 streaminfoBitsPerSample, drflac_frame_header* header)
-{
-    const drflac_uint32 sampleRateTable[15]  = {0, 88200, 176400, 192000, 8000, 16000, 22050, 24000, 32000, 44100, 48000, 60000, 64000, 66000, 96000};
-    const drflac_uint8 bitsPerSampleTable[8] = {0, 8, 12, (drflac_uint8)-1, 16, 20, 24, (drflac_uint8)-1};   /* -1 = reserved. */
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(header != NULL);
-
-    /* Keep looping until we find a valid sync code. */
-    for (;;) {
-        drflac_uint8 crc8 = 0xCE; /* 0xCE = drflac_crc8(0, 0x3FFE, 14); */
-        drflac_uint8 reserved = 0;
-        drflac_uint8 blockingStrategy = 0;
-        drflac_uint8 blockSize = 0;
-        drflac_uint8 sampleRate = 0;
-        drflac_uint8 channelAssignment = 0;
-        drflac_uint8 bitsPerSample = 0;
-        drflac_bool32 isVariableBlockSize;
-
-        if (!drflac__find_and_seek_to_next_sync_code(bs)) {
-            return DRFLAC_FALSE;
-        }
-
-        if (!drflac__read_uint8(bs, 1, &reserved)) {
-            return DRFLAC_FALSE;
-        }
-        if (reserved == 1) {
-            continue;
-        }
-        crc8 = drflac_crc8(crc8, reserved, 1);
-
-        if (!drflac__read_uint8(bs, 1, &blockingStrategy)) {
-            return DRFLAC_FALSE;
-        }
-        crc8 = drflac_crc8(crc8, blockingStrategy, 1);
-
-        if (!drflac__read_uint8(bs, 4, &blockSize)) {
-            return DRFLAC_FALSE;
-        }
-        if (blockSize == 0) {
-            continue;
-        }
-        crc8 = drflac_crc8(crc8, blockSize, 4);
-
-        if (!drflac__read_uint8(bs, 4, &sampleRate)) {
-            return DRFLAC_FALSE;
-        }
-        crc8 = drflac_crc8(crc8, sampleRate, 4);
-
-        if (!drflac__read_uint8(bs, 4, &channelAssignment)) {
-            return DRFLAC_FALSE;
-        }
-        if (channelAssignment > 10) {
-            continue;
-        }
-        crc8 = drflac_crc8(crc8, channelAssignment, 4);
-
-        if (!drflac__read_uint8(bs, 3, &bitsPerSample)) {
-            return DRFLAC_FALSE;
-        }
-        if (bitsPerSample == 3 || bitsPerSample == 7) {
-            continue;
-        }
-        crc8 = drflac_crc8(crc8, bitsPerSample, 3);
-
-
-        if (!drflac__read_uint8(bs, 1, &reserved)) {
-            return DRFLAC_FALSE;
-        }
-        if (reserved == 1) {
-            continue;
-        }
-        crc8 = drflac_crc8(crc8, reserved, 1);
-
-
-        isVariableBlockSize = blockingStrategy == 1;
-        if (isVariableBlockSize) {
-            drflac_uint64 pcmFrameNumber;
-            drflac_result result = drflac__read_utf8_coded_number(bs, &pcmFrameNumber, &crc8);
-            if (result != DRFLAC_SUCCESS) {
-                if (result == DRFLAC_AT_END) {
-                    return DRFLAC_FALSE;
-                } else {
-                    continue;
-                }
-            }
-            header->flacFrameNumber  = 0;
-            header->pcmFrameNumber = pcmFrameNumber;
-        } else {
-            drflac_uint64 flacFrameNumber = 0;
-            drflac_result result = drflac__read_utf8_coded_number(bs, &flacFrameNumber, &crc8);
-            if (result != DRFLAC_SUCCESS) {
-                if (result == DRFLAC_AT_END) {
-                    return DRFLAC_FALSE;
-                } else {
-                    continue;
-                }
-            }
-            header->flacFrameNumber  = (drflac_uint32)flacFrameNumber;   /* <-- Safe cast. */
-            header->pcmFrameNumber = 0;
-        }
-
-
-        DRFLAC_ASSERT(blockSize > 0);
-        if (blockSize == 1) {
-            header->blockSizeInPCMFrames = 192;
-        } else if (blockSize >= 2 && blockSize <= 5) {
-            header->blockSizeInPCMFrames = 576 * (1 << (blockSize - 2));
-        } else if (blockSize == 6) {
-            if (!drflac__read_uint16(bs, 8, &header->blockSizeInPCMFrames)) {
-                return DRFLAC_FALSE;
-            }
-            crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 8);
-            header->blockSizeInPCMFrames += 1;
-        } else if (blockSize == 7) {
-            if (!drflac__read_uint16(bs, 16, &header->blockSizeInPCMFrames)) {
-                return DRFLAC_FALSE;
-            }
-            crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 16);
-            header->blockSizeInPCMFrames += 1;
-        } else {
-            DRFLAC_ASSERT(blockSize >= 8);
-            header->blockSizeInPCMFrames = 256 * (1 << (blockSize - 8));
-        }
-
-
-        if (sampleRate <= 11) {
-            header->sampleRate = sampleRateTable[sampleRate];
-        } else if (sampleRate == 12) {
-            if (!drflac__read_uint32(bs, 8, &header->sampleRate)) {
-                return DRFLAC_FALSE;
-            }
-            crc8 = drflac_crc8(crc8, header->sampleRate, 8);
-            header->sampleRate *= 1000;
-        } else if (sampleRate == 13) {
-            if (!drflac__read_uint32(bs, 16, &header->sampleRate)) {
-                return DRFLAC_FALSE;
-            }
-            crc8 = drflac_crc8(crc8, header->sampleRate, 16);
-        } else if (sampleRate == 14) {
-            if (!drflac__read_uint32(bs, 16, &header->sampleRate)) {
-                return DRFLAC_FALSE;
-            }
-            crc8 = drflac_crc8(crc8, header->sampleRate, 16);
-            header->sampleRate *= 10;
-        } else {
-            continue;  /* Invalid. Assume an invalid block. */
-        }
-
-
-        header->channelAssignment = channelAssignment;
-
-        header->bitsPerSample = bitsPerSampleTable[bitsPerSample];
-        if (header->bitsPerSample == 0) {
-            header->bitsPerSample = streaminfoBitsPerSample;
-        }
-
-        if (!drflac__read_uint8(bs, 8, &header->crc8)) {
-            return DRFLAC_FALSE;
-        }
-
-#ifndef DR_FLAC_NO_CRC
-        if (header->crc8 != crc8) {
-            continue;    /* CRC mismatch. Loop back to the top and find the next sync code. */
-        }
-#endif
-        return DRFLAC_TRUE;
-    }
-}
-
-static drflac_bool32 drflac__read_subframe_header(drflac_bs* bs, drflac_subframe* pSubframe)
-{
-    drflac_uint8 header;
-    int type;
-
-    if (!drflac__read_uint8(bs, 8, &header)) {
-        return DRFLAC_FALSE;
-    }
-
-    /* First bit should always be 0. */
-    if ((header & 0x80) != 0) {
-        return DRFLAC_FALSE;
-    }
-
-    type = (header & 0x7E) >> 1;
-    if (type == 0) {
-        pSubframe->subframeType = DRFLAC_SUBFRAME_CONSTANT;
-    } else if (type == 1) {
-        pSubframe->subframeType = DRFLAC_SUBFRAME_VERBATIM;
-    } else {
-        if ((type & 0x20) != 0) {
-            pSubframe->subframeType = DRFLAC_SUBFRAME_LPC;
-            pSubframe->lpcOrder = (drflac_uint8)(type & 0x1F) + 1;
-        } else if ((type & 0x08) != 0) {
-            pSubframe->subframeType = DRFLAC_SUBFRAME_FIXED;
-            pSubframe->lpcOrder = (drflac_uint8)(type & 0x07);
-            if (pSubframe->lpcOrder > 4) {
-                pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED;
-                pSubframe->lpcOrder = 0;
-            }
-        } else {
-            pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED;
-        }
-    }
-
-    if (pSubframe->subframeType == DRFLAC_SUBFRAME_RESERVED) {
-        return DRFLAC_FALSE;
-    }
-
-    /* Wasted bits per sample. */
-    pSubframe->wastedBitsPerSample = 0;
-    if ((header & 0x01) == 1) {
-        unsigned int wastedBitsPerSample;
-        if (!drflac__seek_past_next_set_bit(bs, &wastedBitsPerSample)) {
-            return DRFLAC_FALSE;
-        }
-        pSubframe->wastedBitsPerSample = (drflac_uint8)wastedBitsPerSample + 1;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex, drflac_int32* pDecodedSamplesOut)
-{
-    drflac_subframe* pSubframe;
-    drflac_uint32 subframeBitsPerSample;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(frame != NULL);
-
-    pSubframe = frame->subframes + subframeIndex;
-    if (!drflac__read_subframe_header(bs, pSubframe)) {
-        return DRFLAC_FALSE;
-    }
-
-    /* Side channels require an extra bit per sample. Took a while to figure that one out... */
-    subframeBitsPerSample = frame->header.bitsPerSample;
-    if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) {
-        subframeBitsPerSample += 1;
-    } else if (frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) {
-        subframeBitsPerSample += 1;
-    }
-
-    /* Need to handle wasted bits per sample. */
-    if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) {
-        return DRFLAC_FALSE;
-    }
-    subframeBitsPerSample -= pSubframe->wastedBitsPerSample;
-
-    pSubframe->pSamplesS32 = pDecodedSamplesOut;
-
-    switch (pSubframe->subframeType)
-    {
-        case DRFLAC_SUBFRAME_CONSTANT:
-        {
-            drflac__decode_samples__constant(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->pSamplesS32);
-        } break;
-
-        case DRFLAC_SUBFRAME_VERBATIM:
-        {
-            drflac__decode_samples__verbatim(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->pSamplesS32);
-        } break;
-
-        case DRFLAC_SUBFRAME_FIXED:
-        {
-            drflac__decode_samples__fixed(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->lpcOrder, pSubframe->pSamplesS32);
-        } break;
-
-        case DRFLAC_SUBFRAME_LPC:
-        {
-            drflac__decode_samples__lpc(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->lpcOrder, pSubframe->pSamplesS32);
-        } break;
-
-        default: return DRFLAC_FALSE;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__seek_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex)
-{
-    drflac_subframe* pSubframe;
-    drflac_uint32 subframeBitsPerSample;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(frame != NULL);
-
-    pSubframe = frame->subframes + subframeIndex;
-    if (!drflac__read_subframe_header(bs, pSubframe)) {
-        return DRFLAC_FALSE;
-    }
-
-    /* Side channels require an extra bit per sample. Took a while to figure that one out... */
-    subframeBitsPerSample = frame->header.bitsPerSample;
-    if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) {
-        subframeBitsPerSample += 1;
-    } else if (frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) {
-        subframeBitsPerSample += 1;
-    }
-
-    /* Need to handle wasted bits per sample. */
-    if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) {
-        return DRFLAC_FALSE;
-    }
-    subframeBitsPerSample -= pSubframe->wastedBitsPerSample;
-
-    pSubframe->pSamplesS32 = NULL;
-
-    switch (pSubframe->subframeType)
-    {
-        case DRFLAC_SUBFRAME_CONSTANT:
-        {
-            if (!drflac__seek_bits(bs, subframeBitsPerSample)) {
-                return DRFLAC_FALSE;
-            }
-        } break;
-
-        case DRFLAC_SUBFRAME_VERBATIM:
-        {
-            unsigned int bitsToSeek = frame->header.blockSizeInPCMFrames * subframeBitsPerSample;
-            if (!drflac__seek_bits(bs, bitsToSeek)) {
-                return DRFLAC_FALSE;
-            }
-        } break;
-
-        case DRFLAC_SUBFRAME_FIXED:
-        {
-            unsigned int bitsToSeek = pSubframe->lpcOrder * subframeBitsPerSample;
-            if (!drflac__seek_bits(bs, bitsToSeek)) {
-                return DRFLAC_FALSE;
-            }
-
-            if (!drflac__read_and_seek_residual(bs, frame->header.blockSizeInPCMFrames, pSubframe->lpcOrder)) {
-                return DRFLAC_FALSE;
-            }
-        } break;
-
-        case DRFLAC_SUBFRAME_LPC:
-        {
-            drflac_uint8 lpcPrecision;
-
-            unsigned int bitsToSeek = pSubframe->lpcOrder * subframeBitsPerSample;
-            if (!drflac__seek_bits(bs, bitsToSeek)) {
-                return DRFLAC_FALSE;
-            }
-
-            if (!drflac__read_uint8(bs, 4, &lpcPrecision)) {
-                return DRFLAC_FALSE;
-            }
-            if (lpcPrecision == 15) {
-                return DRFLAC_FALSE;    /* Invalid. */
-            }
-            lpcPrecision += 1;
-
-
-            bitsToSeek = (pSubframe->lpcOrder * lpcPrecision) + 5;    /* +5 for shift. */
-            if (!drflac__seek_bits(bs, bitsToSeek)) {
-                return DRFLAC_FALSE;
-            }
-
-            if (!drflac__read_and_seek_residual(bs, frame->header.blockSizeInPCMFrames, pSubframe->lpcOrder)) {
-                return DRFLAC_FALSE;
-            }
-        } break;
-
-        default: return DRFLAC_FALSE;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-
-static DRFLAC_INLINE drflac_uint8 drflac__get_channel_count_from_channel_assignment(drflac_int8 channelAssignment)
-{
-    drflac_uint8 lookup[] = {1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2};
-
-    DRFLAC_ASSERT(channelAssignment <= 10);
-    return lookup[channelAssignment];
-}
-
-static drflac_result drflac__decode_flac_frame(drflac* pFlac)
-{
-    int channelCount;
-    int i;
-    drflac_uint8 paddingSizeInBits;
-    drflac_uint16 desiredCRC16;
-#ifndef DR_FLAC_NO_CRC
-    drflac_uint16 actualCRC16;
-#endif
-
-    /* This function should be called while the stream is sitting on the first byte after the frame header. */
-    DRFLAC_ZERO_MEMORY(pFlac->currentFLACFrame.subframes, sizeof(pFlac->currentFLACFrame.subframes));
-
-    /* The frame block size must never be larger than the maximum block size defined by the FLAC stream. */
-    if (pFlac->currentFLACFrame.header.blockSizeInPCMFrames > pFlac->maxBlockSizeInPCMFrames) {
-        return DRFLAC_ERROR;
-    }
-
-    /* The number of channels in the frame must match the channel count from the STREAMINFO block. */
-    channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
-    if (channelCount != (int)pFlac->channels) {
-        return DRFLAC_ERROR;
-    }
-
-    for (i = 0; i < channelCount; ++i) {
-        if (!drflac__decode_subframe(&pFlac->bs, &pFlac->currentFLACFrame, i, pFlac->pDecodedSamples + (pFlac->currentFLACFrame.header.blockSizeInPCMFrames * i))) {
-            return DRFLAC_ERROR;
-        }
-    }
-
-    paddingSizeInBits = (drflac_uint8)(DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7);
-    if (paddingSizeInBits > 0) {
-        drflac_uint8 padding = 0;
-        if (!drflac__read_uint8(&pFlac->bs, paddingSizeInBits, &padding)) {
-            return DRFLAC_AT_END;
-        }
-    }
-
-#ifndef DR_FLAC_NO_CRC
-    actualCRC16 = drflac__flush_crc16(&pFlac->bs);
-#endif
-    if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) {
-        return DRFLAC_AT_END;
-    }
-
-#ifndef DR_FLAC_NO_CRC
-    if (actualCRC16 != desiredCRC16) {
-        return DRFLAC_CRC_MISMATCH;    /* CRC mismatch. */
-    }
-#endif
-
-    pFlac->currentFLACFrame.pcmFramesRemaining = pFlac->currentFLACFrame.header.blockSizeInPCMFrames;
-
-    return DRFLAC_SUCCESS;
-}
-
-static drflac_result drflac__seek_flac_frame(drflac* pFlac)
-{
-    int channelCount;
-    int i;
-    drflac_uint16 desiredCRC16;
-#ifndef DR_FLAC_NO_CRC
-    drflac_uint16 actualCRC16;
-#endif
-
-    channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
-    for (i = 0; i < channelCount; ++i) {
-        if (!drflac__seek_subframe(&pFlac->bs, &pFlac->currentFLACFrame, i)) {
-            return DRFLAC_ERROR;
-        }
-    }
-
-    /* Padding. */
-    if (!drflac__seek_bits(&pFlac->bs, DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7)) {
-        return DRFLAC_ERROR;
-    }
-
-    /* CRC. */
-#ifndef DR_FLAC_NO_CRC
-    actualCRC16 = drflac__flush_crc16(&pFlac->bs);
-#endif
-    if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) {
-        return DRFLAC_AT_END;
-    }
-
-#ifndef DR_FLAC_NO_CRC
-    if (actualCRC16 != desiredCRC16) {
-        return DRFLAC_CRC_MISMATCH;    /* CRC mismatch. */
-    }
-#endif
-
-    return DRFLAC_SUCCESS;
-}
-
-static drflac_bool32 drflac__read_and_decode_next_flac_frame(drflac* pFlac)
-{
-    DRFLAC_ASSERT(pFlac != NULL);
-
-    for (;;) {
-        drflac_result result;
-
-        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
-            return DRFLAC_FALSE;
-        }
-
-        result = drflac__decode_flac_frame(pFlac);
-        if (result != DRFLAC_SUCCESS) {
-            if (result == DRFLAC_CRC_MISMATCH) {
-                continue;   /* CRC mismatch. Skip to the next frame. */
-            } else {
-                return DRFLAC_FALSE;
-            }
-        }
-
-        return DRFLAC_TRUE;
-    }
-}
-
-static void drflac__get_pcm_frame_range_of_current_flac_frame(drflac* pFlac, drflac_uint64* pFirstPCMFrame, drflac_uint64* pLastPCMFrame)
-{
-    drflac_uint64 firstPCMFrame;
-    drflac_uint64 lastPCMFrame;
-
-    DRFLAC_ASSERT(pFlac != NULL);
-
-    firstPCMFrame = pFlac->currentFLACFrame.header.pcmFrameNumber;
-    if (firstPCMFrame == 0) {
-        firstPCMFrame = ((drflac_uint64)pFlac->currentFLACFrame.header.flacFrameNumber) * pFlac->maxBlockSizeInPCMFrames;
-    }
-
-    lastPCMFrame = firstPCMFrame + pFlac->currentFLACFrame.header.blockSizeInPCMFrames;
-    if (lastPCMFrame > 0) {
-        lastPCMFrame -= 1; /* Needs to be zero based. */
-    }
-
-    if (pFirstPCMFrame) {
-        *pFirstPCMFrame = firstPCMFrame;
-    }
-    if (pLastPCMFrame) {
-        *pLastPCMFrame = lastPCMFrame;
-    }
-}
-
-static drflac_bool32 drflac__seek_to_first_frame(drflac* pFlac)
-{
-    drflac_bool32 result;
-
-    DRFLAC_ASSERT(pFlac != NULL);
-
-    result = drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes);
-
-    DRFLAC_ZERO_MEMORY(&pFlac->currentFLACFrame, sizeof(pFlac->currentFLACFrame));
-    pFlac->currentPCMFrame = 0;
-
-    return result;
-}
-
-static DRFLAC_INLINE drflac_result drflac__seek_to_next_flac_frame(drflac* pFlac)
-{
-    /* This function should only ever be called while the decoder is sitting on the first byte past the FRAME_HEADER section. */
-    DRFLAC_ASSERT(pFlac != NULL);
-    return drflac__seek_flac_frame(pFlac);
-}
-
-
-static drflac_uint64 drflac__seek_forward_by_pcm_frames(drflac* pFlac, drflac_uint64 pcmFramesToSeek)
-{
-    drflac_uint64 pcmFramesRead = 0;
-    while (pcmFramesToSeek > 0) {
-        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
-            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
-                break;  /* Couldn't read the next frame, so just break from the loop and return. */
-            }
-        } else {
-            if (pFlac->currentFLACFrame.pcmFramesRemaining > pcmFramesToSeek) {
-                pcmFramesRead   += pcmFramesToSeek;
-                pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)pcmFramesToSeek;   /* <-- Safe cast. Will always be < currentFrame.pcmFramesRemaining < 65536. */
-                pcmFramesToSeek  = 0;
-            } else {
-                pcmFramesRead   += pFlac->currentFLACFrame.pcmFramesRemaining;
-                pcmFramesToSeek -= pFlac->currentFLACFrame.pcmFramesRemaining;
-                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
-            }
-        }
-    }
-
-    pFlac->currentPCMFrame += pcmFramesRead;
-    return pcmFramesRead;
-}
-
-
-static drflac_bool32 drflac__seek_to_pcm_frame__brute_force(drflac* pFlac, drflac_uint64 pcmFrameIndex)
-{
-    drflac_bool32 isMidFrame = DRFLAC_FALSE;
-    drflac_uint64 runningPCMFrameCount;
-
-    DRFLAC_ASSERT(pFlac != NULL);
-
-    /* If we are seeking forward we start from the current position. Otherwise we need to start all the way from the start of the file. */
-    if (pcmFrameIndex >= pFlac->currentPCMFrame) {
-        /* Seeking forward. Need to seek from the current position. */
-        runningPCMFrameCount = pFlac->currentPCMFrame;
-
-        /* The frame header for the first frame may not yet have been read. We need to do that if necessary. */
-        if (pFlac->currentPCMFrame == 0 && pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
-            if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
-                return DRFLAC_FALSE;
-            }
-        } else {
-            isMidFrame = DRFLAC_TRUE;
-        }
-    } else {
-        /* Seeking backwards. Need to seek from the start of the file. */
-        runningPCMFrameCount = 0;
-
-        /* Move back to the start. */
-        if (!drflac__seek_to_first_frame(pFlac)) {
-            return DRFLAC_FALSE;
-        }
-
-        /* Decode the first frame in preparation for sample-exact seeking below. */
-        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
-            return DRFLAC_FALSE;
-        }
-    }
-
-    /*
-    We need to as quickly as possible find the frame that contains the target sample. To do this, we iterate over each frame and inspect its
-    header. If based on the header we can determine that the frame contains the sample, we do a full decode of that frame.
-    */
-    for (;;) {
-        drflac_uint64 pcmFrameCountInThisFLACFrame;
-        drflac_uint64 firstPCMFrameInFLACFrame = 0;
-        drflac_uint64 lastPCMFrameInFLACFrame = 0;
-
-        drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame);
-
-        pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
-        if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) {
-            /*
-            The sample should be in this frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend
-            it never existed and keep iterating.
-            */
-            drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount;
-
-            if (!isMidFrame) {
-                drflac_result result = drflac__decode_flac_frame(pFlac);
-                if (result == DRFLAC_SUCCESS) {
-                    /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */
-                    return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;  /* <-- If this fails, something bad has happened (it should never fail). */
-                } else {
-                    if (result == DRFLAC_CRC_MISMATCH) {
-                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
-                    } else {
-                        return DRFLAC_FALSE;
-                    }
-                }
-            } else {
-                /* We started seeking mid-frame which means we need to skip the frame decoding part. */
-                return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;
-            }
-        } else {
-            /*
-            It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
-            frame never existed and leave the running sample count untouched.
-            */
-            if (!isMidFrame) {
-                drflac_result result = drflac__seek_to_next_flac_frame(pFlac);
-                if (result == DRFLAC_SUCCESS) {
-                    runningPCMFrameCount += pcmFrameCountInThisFLACFrame;
-                } else {
-                    if (result == DRFLAC_CRC_MISMATCH) {
-                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
-                    } else {
-                        return DRFLAC_FALSE;
-                    }
-                }
-            } else {
-                /*
-                We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with
-                drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header.
-                */
-                runningPCMFrameCount += pFlac->currentFLACFrame.pcmFramesRemaining;
-                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
-                isMidFrame = DRFLAC_FALSE;
-            }
-
-            /* If we are seeking to the end of the file and we've just hit it, we're done. */
-            if (pcmFrameIndex == pFlac->totalPCMFrameCount && runningPCMFrameCount == pFlac->totalPCMFrameCount) {
-                return DRFLAC_TRUE;
-            }
-        }
-
-    next_iteration:
-        /* Grab the next frame in preparation for the next iteration. */
-        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
-            return DRFLAC_FALSE;
-        }
-    }
-}
-
-
-#if !defined(DR_FLAC_NO_CRC)
-/*
-We use an average compression ratio to determine our approximate start location. FLAC files are generally about 50%-70% the size of their
-uncompressed counterparts so we'll use this as a basis. I'm going to split the middle and use a factor of 0.6 to determine the starting
-location.
-*/
-#define DRFLAC_BINARY_SEARCH_APPROX_COMPRESSION_RATIO 0.6f
-
-static drflac_bool32 drflac__seek_to_approximate_flac_frame_to_byte(drflac* pFlac, drflac_uint64 targetByte, drflac_uint64 rangeLo, drflac_uint64 rangeHi, drflac_uint64* pLastSuccessfulSeekOffset)
-{
-    DRFLAC_ASSERT(pFlac != NULL);
-    DRFLAC_ASSERT(pLastSuccessfulSeekOffset != NULL);
-    DRFLAC_ASSERT(targetByte >= rangeLo);
-    DRFLAC_ASSERT(targetByte <= rangeHi);
-
-    *pLastSuccessfulSeekOffset = pFlac->firstFLACFramePosInBytes;
-
-    for (;;) {
-        /* When seeking to a byte, failure probably means we've attempted to seek beyond the end of the stream. To counter this we just halve it each attempt. */
-        if (!drflac__seek_to_byte(&pFlac->bs, targetByte)) {
-            /* If we couldn't even seek to the first byte in the stream we have a problem. Just abandon the whole thing. */
-            if (targetByte == 0) {
-                drflac__seek_to_first_frame(pFlac); /* Try to recover. */
-                return DRFLAC_FALSE;
-            }
-
-            /* Halve the byte location and continue. */
-            targetByte = rangeLo + ((rangeHi - rangeLo)/2);
-            rangeHi = targetByte;
-        } else {
-            /* Getting here should mean that we have seeked to an appropriate byte. */
-
-            /* Clear the details of the FLAC frame so we don't misreport data. */
-            DRFLAC_ZERO_MEMORY(&pFlac->currentFLACFrame, sizeof(pFlac->currentFLACFrame));
-
-            /*
-            Now seek to the next FLAC frame. We need to decode the entire frame (not just the header) because it's possible for the header to incorrectly pass the
-            CRC check and return bad data. We need to decode the entire frame to be more certain. Although this seems unlikely, this has happened to me in testing
-            so it needs to stay this way for now.
-            */
-#if 1
-            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
-                /* Halve the byte location and continue. */
-                targetByte = rangeLo + ((rangeHi - rangeLo)/2);
-                rangeHi = targetByte;
-            } else {
-                break;
-            }
-#else
-            if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
-                /* Halve the byte location and continue. */
-                targetByte = rangeLo + ((rangeHi - rangeLo)/2);
-                rangeHi = targetByte;
-            } else {
-                break;
-            }
-#endif
-        }
-    }
-
-    /* The current PCM frame needs to be updated based on the frame we just seeked to. */
-    drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &pFlac->currentPCMFrame, NULL);
-
-    DRFLAC_ASSERT(targetByte <= rangeHi);
-
-    *pLastSuccessfulSeekOffset = targetByte;
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(drflac* pFlac, drflac_uint64 offset)
-{
-    /* This section of code would be used if we were only decoding the FLAC frame header when calling drflac__seek_to_approximate_flac_frame_to_byte(). */
-#if 0
-    if (drflac__decode_flac_frame(pFlac) != DRFLAC_SUCCESS) {
-        /* We failed to decode this frame which may be due to it being corrupt. We'll just use the next valid FLAC frame. */
-        if (drflac__read_and_decode_next_flac_frame(pFlac) == DRFLAC_FALSE) {
-            return DRFLAC_FALSE;
-        }
-    }
-#endif
-
-    return drflac__seek_forward_by_pcm_frames(pFlac, offset) == offset;
-}
-
-
-static drflac_bool32 drflac__seek_to_pcm_frame__binary_search_internal(drflac* pFlac, drflac_uint64 pcmFrameIndex, drflac_uint64 byteRangeLo, drflac_uint64 byteRangeHi)
-{
-    /* This assumes pFlac->currentPCMFrame is sitting on byteRangeLo upon entry. */
-
-    drflac_uint64 targetByte;
-    drflac_uint64 pcmRangeLo = pFlac->totalPCMFrameCount;
-    drflac_uint64 pcmRangeHi = 0;
-    drflac_uint64 lastSuccessfulSeekOffset = (drflac_uint64)-1;
-    drflac_uint64 closestSeekOffsetBeforeTargetPCMFrame = byteRangeLo;
-    drflac_uint32 seekForwardThreshold = (pFlac->maxBlockSizeInPCMFrames != 0) ? pFlac->maxBlockSizeInPCMFrames*2 : 4096;
-
-    targetByte = byteRangeLo + (drflac_uint64)(((drflac_int64)((pcmFrameIndex - pFlac->currentPCMFrame) * pFlac->channels * pFlac->bitsPerSample)/8.0f) * DRFLAC_BINARY_SEARCH_APPROX_COMPRESSION_RATIO);
-    if (targetByte > byteRangeHi) {
-        targetByte = byteRangeHi;
-    }
-
-    for (;;) {
-        if (drflac__seek_to_approximate_flac_frame_to_byte(pFlac, targetByte, byteRangeLo, byteRangeHi, &lastSuccessfulSeekOffset)) {
-            /* We found a FLAC frame. We need to check if it contains the sample we're looking for. */
-            drflac_uint64 newPCMRangeLo;
-            drflac_uint64 newPCMRangeHi;
-            drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &newPCMRangeLo, &newPCMRangeHi);
-
-            /* If we selected the same frame, it means we should be pretty close. Just decode the rest. */
-            if (pcmRangeLo == newPCMRangeLo) {
-                if (!drflac__seek_to_approximate_flac_frame_to_byte(pFlac, closestSeekOffsetBeforeTargetPCMFrame, closestSeekOffsetBeforeTargetPCMFrame, byteRangeHi, &lastSuccessfulSeekOffset)) {
-                    break;  /* Failed to seek to closest frame. */
-                }
-
-                if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame)) {
-                    return DRFLAC_TRUE;
-                } else {
-                    break;  /* Failed to seek forward. */
-                }
-            }
-
-            pcmRangeLo = newPCMRangeLo;
-            pcmRangeHi = newPCMRangeHi;
-
-            if (pcmRangeLo <= pcmFrameIndex && pcmRangeHi >= pcmFrameIndex) {
-                /* The target PCM frame is in this FLAC frame. */
-                if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame) ) {
-                    return DRFLAC_TRUE;
-                } else {
-                    break;  /* Failed to seek to FLAC frame. */
-                }
-            } else {
-                const double approxCompressionRatio = (drflac_int64)(lastSuccessfulSeekOffset - pFlac->firstFLACFramePosInBytes) / ((drflac_int64)(pcmRangeLo * pFlac->channels * pFlac->bitsPerSample)/8.0f);
-
-                if (pcmRangeLo > pcmFrameIndex) {
-                    /* We seeked too far forward. We need to move our target byte backward and try again. */
-                    byteRangeHi = lastSuccessfulSeekOffset;
-                    if (byteRangeLo > byteRangeHi) {
-                        byteRangeLo = byteRangeHi;
-                    }
-
-                    targetByte = byteRangeLo + ((byteRangeHi - byteRangeLo) / 2);
-                    if (targetByte < byteRangeLo) {
-                        targetByte = byteRangeLo;
-                    }
-                } else /*if (pcmRangeHi < pcmFrameIndex)*/ {
-                    /* We didn't seek far enough. We need to move our target byte forward and try again. */
-
-                    /* If we're close enough we can just seek forward. */
-                    if ((pcmFrameIndex - pcmRangeLo) < seekForwardThreshold) {
-                        if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame)) {
-                            return DRFLAC_TRUE;
-                        } else {
-                            break;  /* Failed to seek to FLAC frame. */
-                        }
-                    } else {
-                        byteRangeLo = lastSuccessfulSeekOffset;
-                        if (byteRangeHi < byteRangeLo) {
-                            byteRangeHi = byteRangeLo;
-                        }
-
-                        targetByte = lastSuccessfulSeekOffset + (drflac_uint64)(((drflac_int64)((pcmFrameIndex-pcmRangeLo) * pFlac->channels * pFlac->bitsPerSample)/8.0f) * approxCompressionRatio);
-                        if (targetByte > byteRangeHi) {
-                            targetByte = byteRangeHi;
-                        }
-
-                        if (closestSeekOffsetBeforeTargetPCMFrame < lastSuccessfulSeekOffset) {
-                            closestSeekOffsetBeforeTargetPCMFrame = lastSuccessfulSeekOffset;
-                        }
-                    }
-                }
-            }
-        } else {
-            /* Getting here is really bad. We just recover as best we can, but moving to the first frame in the stream, and then abort. */
-            break;
-        }
-    }
-
-    drflac__seek_to_first_frame(pFlac); /* <-- Try to recover. */
-    return DRFLAC_FALSE;
-}
-
-static drflac_bool32 drflac__seek_to_pcm_frame__binary_search(drflac* pFlac, drflac_uint64 pcmFrameIndex)
-{
-    drflac_uint64 byteRangeLo;
-    drflac_uint64 byteRangeHi;
-    drflac_uint32 seekForwardThreshold = (pFlac->maxBlockSizeInPCMFrames != 0) ? pFlac->maxBlockSizeInPCMFrames*2 : 4096;
-
-    /* Our algorithm currently assumes the FLAC stream is currently sitting at the start. */
-    if (drflac__seek_to_first_frame(pFlac) == DRFLAC_FALSE) {
-        return DRFLAC_FALSE;
-    }
-
-    /* If we're close enough to the start, just move to the start and seek forward. */
-    if (pcmFrameIndex < seekForwardThreshold) {
-        return drflac__seek_forward_by_pcm_frames(pFlac, pcmFrameIndex) == pcmFrameIndex;
-    }
-
-    /*
-    Our starting byte range is the byte position of the first FLAC frame and the approximate end of the file as if it were completely uncompressed. This ensures
-    the entire file is included, even though most of the time it'll exceed the end of the actual stream. This is OK as the frame searching logic will handle it.
-    */
-    byteRangeLo = pFlac->firstFLACFramePosInBytes;
-    byteRangeHi = pFlac->firstFLACFramePosInBytes + (drflac_uint64)((drflac_int64)(pFlac->totalPCMFrameCount * pFlac->channels * pFlac->bitsPerSample)/8.0f);
-
-    return drflac__seek_to_pcm_frame__binary_search_internal(pFlac, pcmFrameIndex, byteRangeLo, byteRangeHi);
-}
-#endif  /* !DR_FLAC_NO_CRC */
-
-static drflac_bool32 drflac__seek_to_pcm_frame__seek_table(drflac* pFlac, drflac_uint64 pcmFrameIndex)
-{
-    drflac_uint32 iClosestSeekpoint = 0;
-    drflac_bool32 isMidFrame = DRFLAC_FALSE;
-    drflac_uint64 runningPCMFrameCount;
-    drflac_uint32 iSeekpoint;
-
-
-    DRFLAC_ASSERT(pFlac != NULL);
-
-    if (pFlac->pSeekpoints == NULL || pFlac->seekpointCount == 0) {
-        return DRFLAC_FALSE;
-    }
-
-    for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) {
-        if (pFlac->pSeekpoints[iSeekpoint].firstPCMFrame >= pcmFrameIndex) {
-            break;
-        }
-
-        iClosestSeekpoint = iSeekpoint;
-    }
-
-    /* There's been cases where the seek table contains only zeros. We need to do some basic validation on the closest seekpoint. */
-    if (pFlac->pSeekpoints[iClosestSeekpoint].pcmFrameCount == 0 || pFlac->pSeekpoints[iClosestSeekpoint].pcmFrameCount > pFlac->maxBlockSizeInPCMFrames) {
-        return DRFLAC_FALSE;
-    }
-    if (pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame > pFlac->totalPCMFrameCount && pFlac->totalPCMFrameCount > 0) {
-        return DRFLAC_FALSE;
-    }
-
-#if !defined(DR_FLAC_NO_CRC)
-    /* At this point we should know the closest seek point. We can use a binary search for this. We need to know the total sample count for this. */
-    if (pFlac->totalPCMFrameCount > 0) {
-        drflac_uint64 byteRangeLo;
-        drflac_uint64 byteRangeHi;
-
-        byteRangeHi = pFlac->firstFLACFramePosInBytes + (drflac_uint64)((drflac_int64)(pFlac->totalPCMFrameCount * pFlac->channels * pFlac->bitsPerSample)/8.0f);
-        byteRangeLo = pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset;
-
-        /*
-        If our closest seek point is not the last one, we only need to search between it and the next one. The section below calculates an appropriate starting
-        value for byteRangeHi which will clamp it appropriately.
-
-        Note that the next seekpoint must have an offset greater than the closest seekpoint because otherwise our binary search algorithm will break down. There
-        have been cases where a seektable consists of seek points where every byte offset is set to 0 which causes problems. If this happens we need to abort.
-        */
-        if (iClosestSeekpoint < pFlac->seekpointCount-1) {
-            drflac_uint32 iNextSeekpoint = iClosestSeekpoint + 1;
-
-            /* Basic validation on the seekpoints to ensure they're usable. */
-            if (pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset >= pFlac->pSeekpoints[iNextSeekpoint].flacFrameOffset || pFlac->pSeekpoints[iNextSeekpoint].pcmFrameCount == 0) {
-                return DRFLAC_FALSE;    /* The next seekpoint doesn't look right. The seek table cannot be trusted from here. Abort. */
-            }
-
-            if (pFlac->pSeekpoints[iNextSeekpoint].firstPCMFrame != (((drflac_uint64)0xFFFFFFFF << 32) | 0xFFFFFFFF)) { /* Make sure it's not a placeholder seekpoint. */
-                byteRangeHi = pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iNextSeekpoint].flacFrameOffset - 1; /* byteRangeHi must be zero based. */
-            }
-        }
-
-        if (drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset)) {
-            if (drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
-                drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &pFlac->currentPCMFrame, NULL);
-
-                if (drflac__seek_to_pcm_frame__binary_search_internal(pFlac, pcmFrameIndex, byteRangeLo, byteRangeHi)) {
-                    return DRFLAC_TRUE;
-                }
-            }
-        }
-    }
-#endif  /* !DR_FLAC_NO_CRC */
-
-    /* Getting here means we need to use a slower algorithm because the binary search method failed or cannot be used. */
-
-    /*
-    If we are seeking forward and the closest seekpoint is _before_ the current sample, we just seek forward from where we are. Otherwise we start seeking
-    from the seekpoint's first sample.
-    */
-    if (pcmFrameIndex >= pFlac->currentPCMFrame && pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame <= pFlac->currentPCMFrame) {
-        /* Optimized case. Just seek forward from where we are. */
-        runningPCMFrameCount = pFlac->currentPCMFrame;
-
-        /* The frame header for the first frame may not yet have been read. We need to do that if necessary. */
-        if (pFlac->currentPCMFrame == 0 && pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
-            if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
-                return DRFLAC_FALSE;
-            }
-        } else {
-            isMidFrame = DRFLAC_TRUE;
-        }
-    } else {
-        /* Slower case. Seek to the start of the seekpoint and then seek forward from there. */
-        runningPCMFrameCount = pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame;
-
-        if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset)) {
-            return DRFLAC_FALSE;
-        }
-
-        /* Grab the frame the seekpoint is sitting on in preparation for the sample-exact seeking below. */
-        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
-            return DRFLAC_FALSE;
-        }
-    }
-
-    for (;;) {
-        drflac_uint64 pcmFrameCountInThisFLACFrame;
-        drflac_uint64 firstPCMFrameInFLACFrame = 0;
-        drflac_uint64 lastPCMFrameInFLACFrame = 0;
-
-        drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame);
-
-        pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
-        if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) {
-            /*
-            The sample should be in this frame. We need to fully decode it, but if it's an invalid frame (a CRC mismatch) we need to pretend
-            it never existed and keep iterating.
-            */
-            drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount;
-
-            if (!isMidFrame) {
-                drflac_result result = drflac__decode_flac_frame(pFlac);
-                if (result == DRFLAC_SUCCESS) {
-                    /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */
-                    return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;  /* <-- If this fails, something bad has happened (it should never fail). */
-                } else {
-                    if (result == DRFLAC_CRC_MISMATCH) {
-                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
-                    } else {
-                        return DRFLAC_FALSE;
-                    }
-                }
-            } else {
-                /* We started seeking mid-frame which means we need to skip the frame decoding part. */
-                return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;
-            }
-        } else {
-            /*
-            It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
-            frame never existed and leave the running sample count untouched.
-            */
-            if (!isMidFrame) {
-                drflac_result result = drflac__seek_to_next_flac_frame(pFlac);
-                if (result == DRFLAC_SUCCESS) {
-                    runningPCMFrameCount += pcmFrameCountInThisFLACFrame;
-                } else {
-                    if (result == DRFLAC_CRC_MISMATCH) {
-                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
-                    } else {
-                        return DRFLAC_FALSE;
-                    }
-                }
-            } else {
-                /*
-                We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with
-                drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header.
-                */
-                runningPCMFrameCount += pFlac->currentFLACFrame.pcmFramesRemaining;
-                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
-                isMidFrame = DRFLAC_FALSE;
-            }
-
-            /* If we are seeking to the end of the file and we've just hit it, we're done. */
-            if (pcmFrameIndex == pFlac->totalPCMFrameCount && runningPCMFrameCount == pFlac->totalPCMFrameCount) {
-                return DRFLAC_TRUE;
-            }
-        }
-
-    next_iteration:
-        /* Grab the next frame in preparation for the next iteration. */
-        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
-            return DRFLAC_FALSE;
-        }
-    }
-}
-
-
-#ifndef DR_FLAC_NO_OGG
-typedef struct
-{
-    drflac_uint8 capturePattern[4];  /* Should be "OggS" */
-    drflac_uint8 structureVersion;   /* Always 0. */
-    drflac_uint8 headerType;
-    drflac_uint64 granulePosition;
-    drflac_uint32 serialNumber;
-    drflac_uint32 sequenceNumber;
-    drflac_uint32 checksum;
-    drflac_uint8 segmentCount;
-    drflac_uint8 segmentTable[255];
-} drflac_ogg_page_header;
-#endif
-
-typedef struct
-{
-    drflac_read_proc onRead;
-    drflac_seek_proc onSeek;
-    drflac_meta_proc onMeta;
-    drflac_container container;
-    void* pUserData;
-    void* pUserDataMD;
-    drflac_uint32 sampleRate;
-    drflac_uint8  channels;
-    drflac_uint8  bitsPerSample;
-    drflac_uint64 totalPCMFrameCount;
-    drflac_uint16 maxBlockSizeInPCMFrames;
-    drflac_uint64 runningFilePos;
-    drflac_bool32 hasStreamInfoBlock;
-    drflac_bool32 hasMetadataBlocks;
-    drflac_bs bs;                           /* <-- A bit streamer is required for loading data during initialization. */
-    drflac_frame_header firstFrameHeader;   /* <-- The header of the first frame that was read during relaxed initalization. Only set if there is no STREAMINFO block. */
-
-#ifndef DR_FLAC_NO_OGG
-    drflac_uint32 oggSerial;
-    drflac_uint64 oggFirstBytePos;
-    drflac_ogg_page_header oggBosHeader;
-#endif
-} drflac_init_info;
-
-static DRFLAC_INLINE void drflac__decode_block_header(drflac_uint32 blockHeader, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize)
-{
-    blockHeader = drflac__be2host_32(blockHeader);
-    *isLastBlock = (drflac_uint8)((blockHeader & 0x80000000UL) >> 31);
-    *blockType   = (drflac_uint8)((blockHeader & 0x7F000000UL) >> 24);
-    *blockSize   =                (blockHeader & 0x00FFFFFFUL);
-}
-
-static DRFLAC_INLINE drflac_bool32 drflac__read_and_decode_block_header(drflac_read_proc onRead, void* pUserData, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize)
-{
-    drflac_uint32 blockHeader;
-
-    *blockSize = 0;
-    if (onRead(pUserData, &blockHeader, 4) != 4) {
-        return DRFLAC_FALSE;
-    }
-
-    drflac__decode_block_header(blockHeader, isLastBlock, blockType, blockSize);
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__read_streaminfo(drflac_read_proc onRead, void* pUserData, drflac_streaminfo* pStreamInfo)
-{
-    drflac_uint32 blockSizes;
-    drflac_uint64 frameSizes = 0;
-    drflac_uint64 importantProps;
-    drflac_uint8 md5[16];
-
-    /* min/max block size. */
-    if (onRead(pUserData, &blockSizes, 4) != 4) {
-        return DRFLAC_FALSE;
-    }
-
-    /* min/max frame size. */
-    if (onRead(pUserData, &frameSizes, 6) != 6) {
-        return DRFLAC_FALSE;
-    }
-
-    /* Sample rate, channels, bits per sample and total sample count. */
-    if (onRead(pUserData, &importantProps, 8) != 8) {
-        return DRFLAC_FALSE;
-    }
-
-    /* MD5 */
-    if (onRead(pUserData, md5, sizeof(md5)) != sizeof(md5)) {
-        return DRFLAC_FALSE;
-    }
-
-    blockSizes     = drflac__be2host_32(blockSizes);
-    frameSizes     = drflac__be2host_64(frameSizes);
-    importantProps = drflac__be2host_64(importantProps);
-
-    pStreamInfo->minBlockSizeInPCMFrames = (drflac_uint16)((blockSizes & 0xFFFF0000) >> 16);
-    pStreamInfo->maxBlockSizeInPCMFrames = (drflac_uint16) (blockSizes & 0x0000FFFF);
-    pStreamInfo->minFrameSizeInPCMFrames = (drflac_uint32)((frameSizes     &  (((drflac_uint64)0x00FFFFFF << 16) << 24)) >> 40);
-    pStreamInfo->maxFrameSizeInPCMFrames = (drflac_uint32)((frameSizes     &  (((drflac_uint64)0x00FFFFFF << 16) <<  0)) >> 16);
-    pStreamInfo->sampleRate              = (drflac_uint32)((importantProps &  (((drflac_uint64)0x000FFFFF << 16) << 28)) >> 44);
-    pStreamInfo->channels                = (drflac_uint8 )((importantProps &  (((drflac_uint64)0x0000000E << 16) << 24)) >> 41) + 1;
-    pStreamInfo->bitsPerSample           = (drflac_uint8 )((importantProps &  (((drflac_uint64)0x0000001F << 16) << 20)) >> 36) + 1;
-    pStreamInfo->totalPCMFrameCount      =                ((importantProps & ((((drflac_uint64)0x0000000F << 16) << 16) | 0xFFFFFFFF)));
-    DRFLAC_COPY_MEMORY(pStreamInfo->md5, md5, sizeof(md5));
-
-    return DRFLAC_TRUE;
-}
-
-
-static void* drflac__malloc_default(size_t sz, void* pUserData)
-{
-    (void)pUserData;
-    return DRFLAC_MALLOC(sz);
-}
-
-static void* drflac__realloc_default(void* p, size_t sz, void* pUserData)
-{
-    (void)pUserData;
-    return DRFLAC_REALLOC(p, sz);
-}
-
-static void drflac__free_default(void* p, void* pUserData)
-{
-    (void)pUserData;
-    DRFLAC_FREE(p);
-}
-
-
-static void* drflac__malloc_from_callbacks(size_t sz, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pAllocationCallbacks == NULL) {
-        return NULL;
-    }
-
-    if (pAllocationCallbacks->onMalloc != NULL) {
-        return pAllocationCallbacks->onMalloc(sz, pAllocationCallbacks->pUserData);
-    }
-
-    /* Try using realloc(). */
-    if (pAllocationCallbacks->onRealloc != NULL) {
-        return pAllocationCallbacks->onRealloc(NULL, sz, pAllocationCallbacks->pUserData);
-    }
-
-    return NULL;
-}
-
-static void* drflac__realloc_from_callbacks(void* p, size_t szNew, size_t szOld, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pAllocationCallbacks == NULL) {
-        return NULL;
-    }
-
-    if (pAllocationCallbacks->onRealloc != NULL) {
-        return pAllocationCallbacks->onRealloc(p, szNew, pAllocationCallbacks->pUserData);
-    }
-
-    /* Try emulating realloc() in terms of malloc()/free(). */
-    if (pAllocationCallbacks->onMalloc != NULL && pAllocationCallbacks->onFree != NULL) {
-        void* p2;
-
-        p2 = pAllocationCallbacks->onMalloc(szNew, pAllocationCallbacks->pUserData);
-        if (p2 == NULL) {
-            return NULL;
-        }
-
-        if (p != NULL) {
-            DRFLAC_COPY_MEMORY(p2, p, szOld);
-            pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
-        }
-
-        return p2;
-    }
-
-    return NULL;
-}
-
-static void drflac__free_from_callbacks(void* p, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    if (p == NULL || pAllocationCallbacks == NULL) {
-        return;
-    }
-
-    if (pAllocationCallbacks->onFree != NULL) {
-        pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
-    }
-}
-
-
-static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_uint64* pFirstFramePos, drflac_uint64* pSeektablePos, drflac_uint32* pSeektableSize, drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    /*
-    We want to keep track of the byte position in the stream of the seektable. At the time of calling this function we know that
-    we'll be sitting on byte 42.
-    */
-    drflac_uint64 runningFilePos = 42;
-    drflac_uint64 seektablePos   = 0;
-    drflac_uint32 seektableSize  = 0;
-
-    for (;;) {
-        drflac_metadata metadata;
-        drflac_uint8 isLastBlock = 0;
-        drflac_uint8 blockType;
-        drflac_uint32 blockSize;
-        if (drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize) == DRFLAC_FALSE) {
-            return DRFLAC_FALSE;
-        }
-        runningFilePos += 4;
-
-        metadata.type = blockType;
-        metadata.pRawData = NULL;
-        metadata.rawDataSize = 0;
-
-        switch (blockType)
-        {
-            case DRFLAC_METADATA_BLOCK_TYPE_APPLICATION:
-            {
-                if (blockSize < 4) {
-                    return DRFLAC_FALSE;
-                }
-
-                if (onMeta) {
-                    void* pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
-                    if (pRawData == NULL) {
-                        return DRFLAC_FALSE;
-                    }
-
-                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
-
-                    metadata.pRawData = pRawData;
-                    metadata.rawDataSize = blockSize;
-                    metadata.data.application.id       = drflac__be2host_32(*(drflac_uint32*)pRawData);
-                    metadata.data.application.pData    = (const void*)((drflac_uint8*)pRawData + sizeof(drflac_uint32));
-                    metadata.data.application.dataSize = blockSize - sizeof(drflac_uint32);
-                    onMeta(pUserDataMD, &metadata);
-
-                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                }
-            } break;
-
-            case DRFLAC_METADATA_BLOCK_TYPE_SEEKTABLE:
-            {
-                seektablePos  = runningFilePos;
-                seektableSize = blockSize;
-
-                if (onMeta) {
-                    drflac_uint32 iSeekpoint;
-                    void* pRawData;
-
-                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
-                    if (pRawData == NULL) {
-                        return DRFLAC_FALSE;
-                    }
-
-                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
-
-                    metadata.pRawData = pRawData;
-                    metadata.rawDataSize = blockSize;
-                    metadata.data.seektable.seekpointCount = blockSize/sizeof(drflac_seekpoint);
-                    metadata.data.seektable.pSeekpoints = (const drflac_seekpoint*)pRawData;
-
-                    /* Endian swap. */
-                    for (iSeekpoint = 0; iSeekpoint < metadata.data.seektable.seekpointCount; ++iSeekpoint) {
-                        drflac_seekpoint* pSeekpoint = (drflac_seekpoint*)pRawData + iSeekpoint;
-                        pSeekpoint->firstPCMFrame   = drflac__be2host_64(pSeekpoint->firstPCMFrame);
-                        pSeekpoint->flacFrameOffset = drflac__be2host_64(pSeekpoint->flacFrameOffset);
-                        pSeekpoint->pcmFrameCount   = drflac__be2host_16(pSeekpoint->pcmFrameCount);
-                    }
-
-                    onMeta(pUserDataMD, &metadata);
-
-                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                }
-            } break;
-
-            case DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT:
-            {
-                if (blockSize < 8) {
-                    return DRFLAC_FALSE;
-                }
-
-                if (onMeta) {
-                    void* pRawData;
-                    const char* pRunningData;
-                    const char* pRunningDataEnd;
-                    drflac_uint32 i;
-
-                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
-                    if (pRawData == NULL) {
-                        return DRFLAC_FALSE;
-                    }
-
-                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
-
-                    metadata.pRawData = pRawData;
-                    metadata.rawDataSize = blockSize;
-
-                    pRunningData    = (const char*)pRawData;
-                    pRunningDataEnd = (const char*)pRawData + blockSize;
-
-                    metadata.data.vorbis_comment.vendorLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-
-                    /* Need space for the rest of the block */
-                    if ((pRunningDataEnd - pRunningData) - 4 < (drflac_int64)metadata.data.vorbis_comment.vendorLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
-                    metadata.data.vorbis_comment.vendor       = pRunningData;                                            pRunningData += metadata.data.vorbis_comment.vendorLength;
-                    metadata.data.vorbis_comment.commentCount = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-
-                    /* Need space for 'commentCount' comments after the block, which at minimum is a drflac_uint32 per comment */
-                    if ((pRunningDataEnd - pRunningData) / sizeof(drflac_uint32) < metadata.data.vorbis_comment.commentCount) { /* <-- Note the order of operations to avoid overflow to a valid value */
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
-                    metadata.data.vorbis_comment.pComments    = pRunningData;
-
-                    /* Check that the comments section is valid before passing it to the callback */
-                    for (i = 0; i < metadata.data.vorbis_comment.commentCount; ++i) {
-                        drflac_uint32 commentLength;
-
-                        if (pRunningDataEnd - pRunningData < 4) {
-                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                            return DRFLAC_FALSE;
-                        }
-
-                        commentLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                        if (pRunningDataEnd - pRunningData < (drflac_int64)commentLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
-                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                            return DRFLAC_FALSE;
-                        }
-                        pRunningData += commentLength;
-                    }
-
-                    onMeta(pUserDataMD, &metadata);
-
-                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                }
-            } break;
-
-            case DRFLAC_METADATA_BLOCK_TYPE_CUESHEET:
-            {
-                if (blockSize < 396) {
-                    return DRFLAC_FALSE;
-                }
-
-                if (onMeta) {
-                    void* pRawData;
-                    const char* pRunningData;
-                    const char* pRunningDataEnd;
-                    drflac_uint8 iTrack;
-                    drflac_uint8 iIndex;
-
-                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
-                    if (pRawData == NULL) {
-                        return DRFLAC_FALSE;
-                    }
-
-                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
-
-                    metadata.pRawData = pRawData;
-                    metadata.rawDataSize = blockSize;
-
-                    pRunningData    = (const char*)pRawData;
-                    pRunningDataEnd = (const char*)pRawData + blockSize;
-
-                    DRFLAC_COPY_MEMORY(metadata.data.cuesheet.catalog, pRunningData, 128);                              pRunningData += 128;
-                    metadata.data.cuesheet.leadInSampleCount = drflac__be2host_64(*(const drflac_uint64*)pRunningData); pRunningData += 8;
-                    metadata.data.cuesheet.isCD              = (pRunningData[0] & 0x80) != 0;                           pRunningData += 259;
-                    metadata.data.cuesheet.trackCount        = pRunningData[0];                                         pRunningData += 1;
-                    metadata.data.cuesheet.pTrackData        = pRunningData;
-
-                    /* Check that the cuesheet tracks are valid before passing it to the callback */
-                    for (iTrack = 0; iTrack < metadata.data.cuesheet.trackCount; ++iTrack) {
-                        drflac_uint8 indexCount;
-                        drflac_uint32 indexPointSize;
-
-                        if (pRunningDataEnd - pRunningData < 36) {
-                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                            return DRFLAC_FALSE;
-                        }
-
-                        /* Skip to the index point count */
-                        pRunningData += 35;
-                        indexCount = pRunningData[0]; pRunningData += 1;
-                        indexPointSize = indexCount * sizeof(drflac_cuesheet_track_index);
-                        if (pRunningDataEnd - pRunningData < (drflac_int64)indexPointSize) {
-                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                            return DRFLAC_FALSE;
-                        }
-
-                        /* Endian swap. */
-                        for (iIndex = 0; iIndex < indexCount; ++iIndex) {
-                            drflac_cuesheet_track_index* pTrack = (drflac_cuesheet_track_index*)pRunningData;
-                            pRunningData += sizeof(drflac_cuesheet_track_index);
-                            pTrack->offset = drflac__be2host_64(pTrack->offset);
-                        }
-                    }
-
-                    onMeta(pUserDataMD, &metadata);
-
-                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                }
-            } break;
-
-            case DRFLAC_METADATA_BLOCK_TYPE_PICTURE:
-            {
-                if (blockSize < 32) {
-                    return DRFLAC_FALSE;
-                }
-
-                if (onMeta) {
-                    void* pRawData;
-                    const char* pRunningData;
-                    const char* pRunningDataEnd;
-
-                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
-                    if (pRawData == NULL) {
-                        return DRFLAC_FALSE;
-                    }
-
-                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
-
-                    metadata.pRawData = pRawData;
-                    metadata.rawDataSize = blockSize;
-
-                    pRunningData    = (const char*)pRawData;
-                    pRunningDataEnd = (const char*)pRawData + blockSize;
-
-                    metadata.data.picture.type       = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.mimeLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-
-                    /* Need space for the rest of the block */
-                    if ((pRunningDataEnd - pRunningData) - 24 < (drflac_int64)metadata.data.picture.mimeLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
-                    metadata.data.picture.mime              = pRunningData;                                            pRunningData += metadata.data.picture.mimeLength;
-                    metadata.data.picture.descriptionLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-
-                    /* Need space for the rest of the block */
-                    if ((pRunningDataEnd - pRunningData) - 20 < (drflac_int64)metadata.data.picture.descriptionLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
-                    metadata.data.picture.description     = pRunningData;                                            pRunningData += metadata.data.picture.descriptionLength;
-                    metadata.data.picture.width           = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.height          = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.colorDepth      = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.indexColorCount = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.pictureDataSize = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.pPictureData    = (const drflac_uint8*)pRunningData;
-
-                    /* Need space for the picture after the block */
-                    if (pRunningDataEnd - pRunningData < (drflac_int64)metadata.data.picture.pictureDataSize) { /* <-- Note the order of operations to avoid overflow to a valid value */
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
-
-                    onMeta(pUserDataMD, &metadata);
-
-                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                }
-            } break;
-
-            case DRFLAC_METADATA_BLOCK_TYPE_PADDING:
-            {
-                if (onMeta) {
-                    metadata.data.padding.unused = 0;
-
-                    /* Padding doesn't have anything meaningful in it, so just skip over it, but make sure the caller is aware of it by firing the callback. */
-                    if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
-                        isLastBlock = DRFLAC_TRUE;  /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */
-                    } else {
-                        onMeta(pUserDataMD, &metadata);
-                    }
-                }
-            } break;
-
-            case DRFLAC_METADATA_BLOCK_TYPE_INVALID:
-            {
-                /* Invalid chunk. Just skip over this one. */
-                if (onMeta) {
-                    if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
-                        isLastBlock = DRFLAC_TRUE;  /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */
-                    }
-                }
-            } break;
-
-            default:
-            {
-                /*
-                It's an unknown chunk, but not necessarily invalid. There's a chance more metadata blocks might be defined later on, so we
-                can at the very least report the chunk to the application and let it look at the raw data.
-                */
-                if (onMeta) {
-                    void* pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
-                    if (pRawData == NULL) {
-                        return DRFLAC_FALSE;
-                    }
-
-                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
-
-                    metadata.pRawData = pRawData;
-                    metadata.rawDataSize = blockSize;
-                    onMeta(pUserDataMD, &metadata);
-
-                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                }
-            } break;
-        }
-
-        /* If we're not handling metadata, just skip over the block. If we are, it will have been handled earlier in the switch statement above. */
-        if (onMeta == NULL && blockSize > 0) {
-            if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
-                isLastBlock = DRFLAC_TRUE;
-            }
-        }
-
-        runningFilePos += blockSize;
-        if (isLastBlock) {
-            break;
-        }
-    }
-
-    *pSeektablePos = seektablePos;
-    *pSeektableSize = seektableSize;
-    *pFirstFramePos = runningFilePos;
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__init_private__native(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed)
-{
-    /* Pre Condition: The bit stream should be sitting just past the 4-byte id header. */
-
-    drflac_uint8 isLastBlock;
-    drflac_uint8 blockType;
-    drflac_uint32 blockSize;
-
-    (void)onSeek;
-
-    pInit->container = drflac_container_native;
-
-    /* The first metadata block should be the STREAMINFO block. */
-    if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) {
-        return DRFLAC_FALSE;
-    }
-
-    if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) {
-        if (!relaxed) {
-            /* We're opening in strict mode and the first block is not the STREAMINFO block. Error. */
-            return DRFLAC_FALSE;
-        } else {
-            /*
-            Relaxed mode. To open from here we need to just find the first frame and set the sample rate, etc. to whatever is defined
-            for that frame.
-            */
-            pInit->hasStreamInfoBlock = DRFLAC_FALSE;
-            pInit->hasMetadataBlocks  = DRFLAC_FALSE;
-
-            if (!drflac__read_next_flac_frame_header(&pInit->bs, 0, &pInit->firstFrameHeader)) {
-                return DRFLAC_FALSE;    /* Couldn't find a frame. */
-            }
-
-            if (pInit->firstFrameHeader.bitsPerSample == 0) {
-                return DRFLAC_FALSE;    /* Failed to initialize because the first frame depends on the STREAMINFO block, which does not exist. */
-            }
-
-            pInit->sampleRate              = pInit->firstFrameHeader.sampleRate;
-            pInit->channels                = drflac__get_channel_count_from_channel_assignment(pInit->firstFrameHeader.channelAssignment);
-            pInit->bitsPerSample           = pInit->firstFrameHeader.bitsPerSample;
-            pInit->maxBlockSizeInPCMFrames = 65535;   /* <-- See notes here: https://xiph.org/flac/format.html#metadata_block_streaminfo */
-            return DRFLAC_TRUE;
-        }
-    } else {
-        drflac_streaminfo streaminfo;
-        if (!drflac__read_streaminfo(onRead, pUserData, &streaminfo)) {
-            return DRFLAC_FALSE;
-        }
-
-        pInit->hasStreamInfoBlock      = DRFLAC_TRUE;
-        pInit->sampleRate              = streaminfo.sampleRate;
-        pInit->channels                = streaminfo.channels;
-        pInit->bitsPerSample           = streaminfo.bitsPerSample;
-        pInit->totalPCMFrameCount      = streaminfo.totalPCMFrameCount;
-        pInit->maxBlockSizeInPCMFrames = streaminfo.maxBlockSizeInPCMFrames;    /* Don't care about the min block size - only the max (used for determining the size of the memory allocation). */
-        pInit->hasMetadataBlocks       = !isLastBlock;
-
-        if (onMeta) {
-            drflac_metadata metadata;
-            metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO;
-            metadata.pRawData = NULL;
-            metadata.rawDataSize = 0;
-            metadata.data.streaminfo = streaminfo;
-            onMeta(pUserDataMD, &metadata);
-        }
-
-        return DRFLAC_TRUE;
-    }
-}
-
-#ifndef DR_FLAC_NO_OGG
-#define DRFLAC_OGG_MAX_PAGE_SIZE            65307
-#define DRFLAC_OGG_CAPTURE_PATTERN_CRC32    1605413199  /* CRC-32 of "OggS". */
-
-typedef enum
-{
-    drflac_ogg_recover_on_crc_mismatch,
-    drflac_ogg_fail_on_crc_mismatch
-} drflac_ogg_crc_mismatch_recovery;
-
-#ifndef DR_FLAC_NO_CRC
-static drflac_uint32 drflac__crc32_table[] = {
-    0x00000000L, 0x04C11DB7L, 0x09823B6EL, 0x0D4326D9L,
-    0x130476DCL, 0x17C56B6BL, 0x1A864DB2L, 0x1E475005L,
-    0x2608EDB8L, 0x22C9F00FL, 0x2F8AD6D6L, 0x2B4BCB61L,
-    0x350C9B64L, 0x31CD86D3L, 0x3C8EA00AL, 0x384FBDBDL,
-    0x4C11DB70L, 0x48D0C6C7L, 0x4593E01EL, 0x4152FDA9L,
-    0x5F15ADACL, 0x5BD4B01BL, 0x569796C2L, 0x52568B75L,
-    0x6A1936C8L, 0x6ED82B7FL, 0x639B0DA6L, 0x675A1011L,
-    0x791D4014L, 0x7DDC5DA3L, 0x709F7B7AL, 0x745E66CDL,
-    0x9823B6E0L, 0x9CE2AB57L, 0x91A18D8EL, 0x95609039L,
-    0x8B27C03CL, 0x8FE6DD8BL, 0x82A5FB52L, 0x8664E6E5L,
-    0xBE2B5B58L, 0xBAEA46EFL, 0xB7A96036L, 0xB3687D81L,
-    0xAD2F2D84L, 0xA9EE3033L, 0xA4AD16EAL, 0xA06C0B5DL,
-    0xD4326D90L, 0xD0F37027L, 0xDDB056FEL, 0xD9714B49L,
-    0xC7361B4CL, 0xC3F706FBL, 0xCEB42022L, 0xCA753D95L,
-    0xF23A8028L, 0xF6FB9D9FL, 0xFBB8BB46L, 0xFF79A6F1L,
-    0xE13EF6F4L, 0xE5FFEB43L, 0xE8BCCD9AL, 0xEC7DD02DL,
-    0x34867077L, 0x30476DC0L, 0x3D044B19L, 0x39C556AEL,
-    0x278206ABL, 0x23431B1CL, 0x2E003DC5L, 0x2AC12072L,
-    0x128E9DCFL, 0x164F8078L, 0x1B0CA6A1L, 0x1FCDBB16L,
-    0x018AEB13L, 0x054BF6A4L, 0x0808D07DL, 0x0CC9CDCAL,
-    0x7897AB07L, 0x7C56B6B0L, 0x71159069L, 0x75D48DDEL,
-    0x6B93DDDBL, 0x6F52C06CL, 0x6211E6B5L, 0x66D0FB02L,
-    0x5E9F46BFL, 0x5A5E5B08L, 0x571D7DD1L, 0x53DC6066L,
-    0x4D9B3063L, 0x495A2DD4L, 0x44190B0DL, 0x40D816BAL,
-    0xACA5C697L, 0xA864DB20L, 0xA527FDF9L, 0xA1E6E04EL,
-    0xBFA1B04BL, 0xBB60ADFCL, 0xB6238B25L, 0xB2E29692L,
-    0x8AAD2B2FL, 0x8E6C3698L, 0x832F1041L, 0x87EE0DF6L,
-    0x99A95DF3L, 0x9D684044L, 0x902B669DL, 0x94EA7B2AL,
-    0xE0B41DE7L, 0xE4750050L, 0xE9362689L, 0xEDF73B3EL,
-    0xF3B06B3BL, 0xF771768CL, 0xFA325055L, 0xFEF34DE2L,
-    0xC6BCF05FL, 0xC27DEDE8L, 0xCF3ECB31L, 0xCBFFD686L,
-    0xD5B88683L, 0xD1799B34L, 0xDC3ABDEDL, 0xD8FBA05AL,
-    0x690CE0EEL, 0x6DCDFD59L, 0x608EDB80L, 0x644FC637L,
-    0x7A089632L, 0x7EC98B85L, 0x738AAD5CL, 0x774BB0EBL,
-    0x4F040D56L, 0x4BC510E1L, 0x46863638L, 0x42472B8FL,
-    0x5C007B8AL, 0x58C1663DL, 0x558240E4L, 0x51435D53L,
-    0x251D3B9EL, 0x21DC2629L, 0x2C9F00F0L, 0x285E1D47L,
-    0x36194D42L, 0x32D850F5L, 0x3F9B762CL, 0x3B5A6B9BL,
-    0x0315D626L, 0x07D4CB91L, 0x0A97ED48L, 0x0E56F0FFL,
-    0x1011A0FAL, 0x14D0BD4DL, 0x19939B94L, 0x1D528623L,
-    0xF12F560EL, 0xF5EE4BB9L, 0xF8AD6D60L, 0xFC6C70D7L,
-    0xE22B20D2L, 0xE6EA3D65L, 0xEBA91BBCL, 0xEF68060BL,
-    0xD727BBB6L, 0xD3E6A601L, 0xDEA580D8L, 0xDA649D6FL,
-    0xC423CD6AL, 0xC0E2D0DDL, 0xCDA1F604L, 0xC960EBB3L,
-    0xBD3E8D7EL, 0xB9FF90C9L, 0xB4BCB610L, 0xB07DABA7L,
-    0xAE3AFBA2L, 0xAAFBE615L, 0xA7B8C0CCL, 0xA379DD7BL,
-    0x9B3660C6L, 0x9FF77D71L, 0x92B45BA8L, 0x9675461FL,
-    0x8832161AL, 0x8CF30BADL, 0x81B02D74L, 0x857130C3L,
-    0x5D8A9099L, 0x594B8D2EL, 0x5408ABF7L, 0x50C9B640L,
-    0x4E8EE645L, 0x4A4FFBF2L, 0x470CDD2BL, 0x43CDC09CL,
-    0x7B827D21L, 0x7F436096L, 0x7200464FL, 0x76C15BF8L,
-    0x68860BFDL, 0x6C47164AL, 0x61043093L, 0x65C52D24L,
-    0x119B4BE9L, 0x155A565EL, 0x18197087L, 0x1CD86D30L,
-    0x029F3D35L, 0x065E2082L, 0x0B1D065BL, 0x0FDC1BECL,
-    0x3793A651L, 0x3352BBE6L, 0x3E119D3FL, 0x3AD08088L,
-    0x2497D08DL, 0x2056CD3AL, 0x2D15EBE3L, 0x29D4F654L,
-    0xC5A92679L, 0xC1683BCEL, 0xCC2B1D17L, 0xC8EA00A0L,
-    0xD6AD50A5L, 0xD26C4D12L, 0xDF2F6BCBL, 0xDBEE767CL,
-    0xE3A1CBC1L, 0xE760D676L, 0xEA23F0AFL, 0xEEE2ED18L,
-    0xF0A5BD1DL, 0xF464A0AAL, 0xF9278673L, 0xFDE69BC4L,
-    0x89B8FD09L, 0x8D79E0BEL, 0x803AC667L, 0x84FBDBD0L,
-    0x9ABC8BD5L, 0x9E7D9662L, 0x933EB0BBL, 0x97FFAD0CL,
-    0xAFB010B1L, 0xAB710D06L, 0xA6322BDFL, 0xA2F33668L,
-    0xBCB4666DL, 0xB8757BDAL, 0xB5365D03L, 0xB1F740B4L
-};
-#endif
-
-static DRFLAC_INLINE drflac_uint32 drflac_crc32_byte(drflac_uint32 crc32, drflac_uint8 data)
-{
-#ifndef DR_FLAC_NO_CRC
-    return (crc32 << 8) ^ drflac__crc32_table[(drflac_uint8)((crc32 >> 24) & 0xFF) ^ data];
-#else
-    (void)data;
-    return crc32;
-#endif
-}
-
-#if 0
-static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint32(drflac_uint32 crc32, drflac_uint32 data)
-{
-    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 24) & 0xFF));
-    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 16) & 0xFF));
-    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >>  8) & 0xFF));
-    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >>  0) & 0xFF));
-    return crc32;
-}
-
-static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint64(drflac_uint32 crc32, drflac_uint64 data)
-{
-    crc32 = drflac_crc32_uint32(crc32, (drflac_uint32)((data >> 32) & 0xFFFFFFFF));
-    crc32 = drflac_crc32_uint32(crc32, (drflac_uint32)((data >>  0) & 0xFFFFFFFF));
-    return crc32;
-}
-#endif
-
-static DRFLAC_INLINE drflac_uint32 drflac_crc32_buffer(drflac_uint32 crc32, drflac_uint8* pData, drflac_uint32 dataSize)
-{
-    /* This can be optimized. */
-    drflac_uint32 i;
-    for (i = 0; i < dataSize; ++i) {
-        crc32 = drflac_crc32_byte(crc32, pData[i]);
-    }
-    return crc32;
-}
-
-
-static DRFLAC_INLINE drflac_bool32 drflac_ogg__is_capture_pattern(drflac_uint8 pattern[4])
-{
-    return pattern[0] == 'O' && pattern[1] == 'g' && pattern[2] == 'g' && pattern[3] == 'S';
-}
-
-static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_header_size(drflac_ogg_page_header* pHeader)
-{
-    return 27 + pHeader->segmentCount;
-}
-
-static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_body_size(drflac_ogg_page_header* pHeader)
-{
-    drflac_uint32 pageBodySize = 0;
-    int i;
-
-    for (i = 0; i < pHeader->segmentCount; ++i) {
-        pageBodySize += pHeader->segmentTable[i];
-    }
-
-    return pageBodySize;
-}
-
-static drflac_result drflac_ogg__read_page_header_after_capture_pattern(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32)
-{
-    drflac_uint8 data[23];
-    drflac_uint32 i;
-
-    DRFLAC_ASSERT(*pCRC32 == DRFLAC_OGG_CAPTURE_PATTERN_CRC32);
-
-    if (onRead(pUserData, data, 23) != 23) {
-        return DRFLAC_AT_END;
-    }
-    *pBytesRead += 23;
-
-    /*
-    It's not actually used, but set the capture pattern to 'OggS' for completeness. Not doing this will cause static analysers to complain about
-    us trying to access uninitialized data. We could alternatively just comment out this member of the drflac_ogg_page_header structure, but I
-    like to have it map to the structure of the underlying data.
-    */
-    pHeader->capturePattern[0] = 'O';
-    pHeader->capturePattern[1] = 'g';
-    pHeader->capturePattern[2] = 'g';
-    pHeader->capturePattern[3] = 'S';
-
-    pHeader->structureVersion = data[0];
-    pHeader->headerType       = data[1];
-    DRFLAC_COPY_MEMORY(&pHeader->granulePosition, &data[ 2], 8);
-    DRFLAC_COPY_MEMORY(&pHeader->serialNumber,    &data[10], 4);
-    DRFLAC_COPY_MEMORY(&pHeader->sequenceNumber,  &data[14], 4);
-    DRFLAC_COPY_MEMORY(&pHeader->checksum,        &data[18], 4);
-    pHeader->segmentCount     = data[22];
-
-    /* Calculate the CRC. Note that for the calculation the checksum part of the page needs to be set to 0. */
-    data[18] = 0;
-    data[19] = 0;
-    data[20] = 0;
-    data[21] = 0;
-
-    for (i = 0; i < 23; ++i) {
-        *pCRC32 = drflac_crc32_byte(*pCRC32, data[i]);
-    }
-
-
-    if (onRead(pUserData, pHeader->segmentTable, pHeader->segmentCount) != pHeader->segmentCount) {
-        return DRFLAC_AT_END;
-    }
-    *pBytesRead += pHeader->segmentCount;
-
-    for (i = 0; i < pHeader->segmentCount; ++i) {
-        *pCRC32 = drflac_crc32_byte(*pCRC32, pHeader->segmentTable[i]);
-    }
-
-    return DRFLAC_SUCCESS;
-}
-
-static drflac_result drflac_ogg__read_page_header(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32)
-{
-    drflac_uint8 id[4];
-
-    *pBytesRead = 0;
-
-    if (onRead(pUserData, id, 4) != 4) {
-        return DRFLAC_AT_END;
-    }
-    *pBytesRead += 4;
-
-    /* We need to read byte-by-byte until we find the OggS capture pattern. */
-    for (;;) {
-        if (drflac_ogg__is_capture_pattern(id)) {
-            drflac_result result;
-
-            *pCRC32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32;
-
-            result = drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, pHeader, pBytesRead, pCRC32);
-            if (result == DRFLAC_SUCCESS) {
-                return DRFLAC_SUCCESS;
-            } else {
-                if (result == DRFLAC_CRC_MISMATCH) {
-                    continue;
-                } else {
-                    return result;
-                }
-            }
-        } else {
-            /* The first 4 bytes did not equal the capture pattern. Read the next byte and try again. */
-            id[0] = id[1];
-            id[1] = id[2];
-            id[2] = id[3];
-            if (onRead(pUserData, &id[3], 1) != 1) {
-                return DRFLAC_AT_END;
-            }
-            *pBytesRead += 1;
-        }
-    }
-}
-
-
-/*
-The main part of the Ogg encapsulation is the conversion from the physical Ogg bitstream to the native FLAC bitstream. It works
-in three general stages: Ogg Physical Bitstream -> Ogg/FLAC Logical Bitstream -> FLAC Native Bitstream. dr_flac is designed
-in such a way that the core sections assume everything is delivered in native format. Therefore, for each encapsulation type
-dr_flac is supporting there needs to be a layer sitting on top of the onRead and onSeek callbacks that ensures the bits read from
-the physical Ogg bitstream are converted and delivered in native FLAC format.
-*/
-typedef struct
-{
-    drflac_read_proc onRead;                /* The original onRead callback from drflac_open() and family. */
-    drflac_seek_proc onSeek;                /* The original onSeek callback from drflac_open() and family. */
-    void* pUserData;                        /* The user data passed on onRead and onSeek. This is the user data that was passed on drflac_open() and family. */
-    drflac_uint64 currentBytePos;           /* The position of the byte we are sitting on in the physical byte stream. Used for efficient seeking. */
-    drflac_uint64 firstBytePos;             /* The position of the first byte in the physical bitstream. Points to the start of the "OggS" identifier of the FLAC bos page. */
-    drflac_uint32 serialNumber;             /* The serial number of the FLAC audio pages. This is determined by the initial header page that was read during initialization. */
-    drflac_ogg_page_header bosPageHeader;   /* Used for seeking. */
-    drflac_ogg_page_header currentPageHeader;
-    drflac_uint32 bytesRemainingInPage;
-    drflac_uint32 pageDataSize;
-    drflac_uint8 pageData[DRFLAC_OGG_MAX_PAGE_SIZE];
-} drflac_oggbs; /* oggbs = Ogg Bitstream */
-
-static size_t drflac_oggbs__read_physical(drflac_oggbs* oggbs, void* bufferOut, size_t bytesToRead)
-{
-    size_t bytesActuallyRead = oggbs->onRead(oggbs->pUserData, bufferOut, bytesToRead);
-    oggbs->currentBytePos += bytesActuallyRead;
-
-    return bytesActuallyRead;
-}
-
-static drflac_bool32 drflac_oggbs__seek_physical(drflac_oggbs* oggbs, drflac_uint64 offset, drflac_seek_origin origin)
-{
-    if (origin == drflac_seek_origin_start) {
-        if (offset <= 0x7FFFFFFF) {
-            if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_start)) {
-                return DRFLAC_FALSE;
-            }
-            oggbs->currentBytePos = offset;
-
-            return DRFLAC_TRUE;
-        } else {
-            if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) {
-                return DRFLAC_FALSE;
-            }
-            oggbs->currentBytePos = offset;
-
-            return drflac_oggbs__seek_physical(oggbs, offset - 0x7FFFFFFF, drflac_seek_origin_current);
-        }
-    } else {
-        while (offset > 0x7FFFFFFF) {
-            if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_current)) {
-                return DRFLAC_FALSE;
-            }
-            oggbs->currentBytePos += 0x7FFFFFFF;
-            offset -= 0x7FFFFFFF;
-        }
-
-        if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_current)) {    /* <-- Safe cast thanks to the loop above. */
-            return DRFLAC_FALSE;
-        }
-        oggbs->currentBytePos += offset;
-
-        return DRFLAC_TRUE;
-    }
-}
-
-static drflac_bool32 drflac_oggbs__goto_next_page(drflac_oggbs* oggbs, drflac_ogg_crc_mismatch_recovery recoveryMethod)
-{
-    drflac_ogg_page_header header;
-    for (;;) {
-        drflac_uint32 crc32 = 0;
-        drflac_uint32 bytesRead;
-        drflac_uint32 pageBodySize;
-#ifndef DR_FLAC_NO_CRC
-        drflac_uint32 actualCRC32;
-#endif
-
-        if (drflac_ogg__read_page_header(oggbs->onRead, oggbs->pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
-            return DRFLAC_FALSE;
-        }
-        oggbs->currentBytePos += bytesRead;
-
-        pageBodySize = drflac_ogg__get_page_body_size(&header);
-        if (pageBodySize > DRFLAC_OGG_MAX_PAGE_SIZE) {
-            continue;   /* Invalid page size. Assume it's corrupted and just move to the next page. */
-        }
-
-        if (header.serialNumber != oggbs->serialNumber) {
-            /* It's not a FLAC page. Skip it. */
-            if (pageBodySize > 0 && !drflac_oggbs__seek_physical(oggbs, pageBodySize, drflac_seek_origin_current)) {
-                return DRFLAC_FALSE;
-            }
-            continue;
-        }
-
-
-        /* We need to read the entire page and then do a CRC check on it. If there's a CRC mismatch we need to skip this page. */
-        if (drflac_oggbs__read_physical(oggbs, oggbs->pageData, pageBodySize) != pageBodySize) {
-            return DRFLAC_FALSE;
-        }
-        oggbs->pageDataSize = pageBodySize;
-
-#ifndef DR_FLAC_NO_CRC
-        actualCRC32 = drflac_crc32_buffer(crc32, oggbs->pageData, oggbs->pageDataSize);
-        if (actualCRC32 != header.checksum) {
-            if (recoveryMethod == drflac_ogg_recover_on_crc_mismatch) {
-                continue;   /* CRC mismatch. Skip this page. */
-            } else {
-                /*
-                Even though we are failing on a CRC mismatch, we still want our stream to be in a good state. Therefore we
-                go to the next valid page to ensure we're in a good state, but return false to let the caller know that the
-                seek did not fully complete.
-                */
-                drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch);
-                return DRFLAC_FALSE;
-            }
-        }
-#else
-        (void)recoveryMethod;   /* <-- Silence a warning. */
-#endif
-
-        oggbs->currentPageHeader = header;
-        oggbs->bytesRemainingInPage = pageBodySize;
-        return DRFLAC_TRUE;
-    }
-}
-
-/* Function below is unused at the moment, but I might be re-adding it later. */
-#if 0
-static drflac_uint8 drflac_oggbs__get_current_segment_index(drflac_oggbs* oggbs, drflac_uint8* pBytesRemainingInSeg)
-{
-    drflac_uint32 bytesConsumedInPage = drflac_ogg__get_page_body_size(&oggbs->currentPageHeader) - oggbs->bytesRemainingInPage;
-    drflac_uint8 iSeg = 0;
-    drflac_uint32 iByte = 0;
-    while (iByte < bytesConsumedInPage) {
-        drflac_uint8 segmentSize = oggbs->currentPageHeader.segmentTable[iSeg];
-        if (iByte + segmentSize > bytesConsumedInPage) {
-            break;
-        } else {
-            iSeg += 1;
-            iByte += segmentSize;
-        }
-    }
-
-    *pBytesRemainingInSeg = oggbs->currentPageHeader.segmentTable[iSeg] - (drflac_uint8)(bytesConsumedInPage - iByte);
-    return iSeg;
-}
-
-static drflac_bool32 drflac_oggbs__seek_to_next_packet(drflac_oggbs* oggbs)
-{
-    /* The current packet ends when we get to the segment with a lacing value of < 255 which is not at the end of a page. */
-    for (;;) {
-        drflac_bool32 atEndOfPage = DRFLAC_FALSE;
-
-        drflac_uint8 bytesRemainingInSeg;
-        drflac_uint8 iFirstSeg = drflac_oggbs__get_current_segment_index(oggbs, &bytesRemainingInSeg);
-
-        drflac_uint32 bytesToEndOfPacketOrPage = bytesRemainingInSeg;
-        for (drflac_uint8 iSeg = iFirstSeg; iSeg < oggbs->currentPageHeader.segmentCount; ++iSeg) {
-            drflac_uint8 segmentSize = oggbs->currentPageHeader.segmentTable[iSeg];
-            if (segmentSize < 255) {
-                if (iSeg == oggbs->currentPageHeader.segmentCount-1) {
-                    atEndOfPage = DRFLAC_TRUE;
-                }
-
-                break;
-            }
-
-            bytesToEndOfPacketOrPage += segmentSize;
-        }
-
-        /*
-        At this point we will have found either the packet or the end of the page. If were at the end of the page we'll
-        want to load the next page and keep searching for the end of the packet.
-        */
-        drflac_oggbs__seek_physical(oggbs, bytesToEndOfPacketOrPage, drflac_seek_origin_current);
-        oggbs->bytesRemainingInPage -= bytesToEndOfPacketOrPage;
-
-        if (atEndOfPage) {
-            /*
-            We're potentially at the next packet, but we need to check the next page first to be sure because the packet may
-            straddle pages.
-            */
-            if (!drflac_oggbs__goto_next_page(oggbs)) {
-                return DRFLAC_FALSE;
-            }
-
-            /* If it's a fresh packet it most likely means we're at the next packet. */
-            if ((oggbs->currentPageHeader.headerType & 0x01) == 0) {
-                return DRFLAC_TRUE;
-            }
-        } else {
-            /* We're at the next packet. */
-            return DRFLAC_TRUE;
-        }
-    }
-}
-
-static drflac_bool32 drflac_oggbs__seek_to_next_frame(drflac_oggbs* oggbs)
-{
-    /* The bitstream should be sitting on the first byte just after the header of the frame. */
-
-    /* What we're actually doing here is seeking to the start of the next packet. */
-    return drflac_oggbs__seek_to_next_packet(oggbs);
-}
-#endif
-
-static size_t drflac__on_read_ogg(void* pUserData, void* bufferOut, size_t bytesToRead)
-{
-    drflac_oggbs* oggbs = (drflac_oggbs*)pUserData;
-    drflac_uint8* pRunningBufferOut = (drflac_uint8*)bufferOut;
-    size_t bytesRead = 0;
-
-    DRFLAC_ASSERT(oggbs != NULL);
-    DRFLAC_ASSERT(pRunningBufferOut != NULL);
-
-    /* Reading is done page-by-page. If we've run out of bytes in the page we need to move to the next one. */
-    while (bytesRead < bytesToRead) {
-        size_t bytesRemainingToRead = bytesToRead - bytesRead;
-
-        if (oggbs->bytesRemainingInPage >= bytesRemainingToRead) {
-            DRFLAC_COPY_MEMORY(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), bytesRemainingToRead);
-            bytesRead += bytesRemainingToRead;
-            oggbs->bytesRemainingInPage -= (drflac_uint32)bytesRemainingToRead;
-            break;
-        }
-
-        /* If we get here it means some of the requested data is contained in the next pages. */
-        if (oggbs->bytesRemainingInPage > 0) {
-            DRFLAC_COPY_MEMORY(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), oggbs->bytesRemainingInPage);
-            bytesRead += oggbs->bytesRemainingInPage;
-            pRunningBufferOut += oggbs->bytesRemainingInPage;
-            oggbs->bytesRemainingInPage = 0;
-        }
-
-        DRFLAC_ASSERT(bytesRemainingToRead > 0);
-        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
-            break;  /* Failed to go to the next page. Might have simply hit the end of the stream. */
-        }
-    }
-
-    return bytesRead;
-}
-
-static drflac_bool32 drflac__on_seek_ogg(void* pUserData, int offset, drflac_seek_origin origin)
-{
-    drflac_oggbs* oggbs = (drflac_oggbs*)pUserData;
-    int bytesSeeked = 0;
-
-    DRFLAC_ASSERT(oggbs != NULL);
-    DRFLAC_ASSERT(offset >= 0);  /* <-- Never seek backwards. */
-
-    /* Seeking is always forward which makes things a lot simpler. */
-    if (origin == drflac_seek_origin_start) {
-        if (!drflac_oggbs__seek_physical(oggbs, (int)oggbs->firstBytePos, drflac_seek_origin_start)) {
-            return DRFLAC_FALSE;
-        }
-
-        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) {
-            return DRFLAC_FALSE;
-        }
-
-        return drflac__on_seek_ogg(pUserData, offset, drflac_seek_origin_current);
-    }
-
-    DRFLAC_ASSERT(origin == drflac_seek_origin_current);
-
-    while (bytesSeeked < offset) {
-        int bytesRemainingToSeek = offset - bytesSeeked;
-        DRFLAC_ASSERT(bytesRemainingToSeek >= 0);
-
-        if (oggbs->bytesRemainingInPage >= (size_t)bytesRemainingToSeek) {
-            bytesSeeked += bytesRemainingToSeek;
-            (void)bytesSeeked;  /* <-- Silence a dead store warning emitted by Clang Static Analyzer. */
-            oggbs->bytesRemainingInPage -= bytesRemainingToSeek;
-            break;
-        }
-
-        /* If we get here it means some of the requested data is contained in the next pages. */
-        if (oggbs->bytesRemainingInPage > 0) {
-            bytesSeeked += (int)oggbs->bytesRemainingInPage;
-            oggbs->bytesRemainingInPage = 0;
-        }
-
-        DRFLAC_ASSERT(bytesRemainingToSeek > 0);
-        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) {
-            /* Failed to go to the next page. We either hit the end of the stream or had a CRC mismatch. */
-            return DRFLAC_FALSE;
-        }
-    }
-
-    return DRFLAC_TRUE;
-}
-
-
-static drflac_bool32 drflac_ogg__seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex)
-{
-    drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
-    drflac_uint64 originalBytePos;
-    drflac_uint64 runningGranulePosition;
-    drflac_uint64 runningFrameBytePos;
-    drflac_uint64 runningPCMFrameCount;
-
-    DRFLAC_ASSERT(oggbs != NULL);
-
-    originalBytePos = oggbs->currentBytePos;   /* For recovery. Points to the OggS identifier. */
-
-    /* First seek to the first frame. */
-    if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes)) {
-        return DRFLAC_FALSE;
-    }
-    oggbs->bytesRemainingInPage = 0;
-
-    runningGranulePosition = 0;
-    for (;;) {
-        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
-            drflac_oggbs__seek_physical(oggbs, originalBytePos, drflac_seek_origin_start);
-            return DRFLAC_FALSE;   /* Never did find that sample... */
-        }
-
-        runningFrameBytePos = oggbs->currentBytePos - drflac_ogg__get_page_header_size(&oggbs->currentPageHeader) - oggbs->pageDataSize;
-        if (oggbs->currentPageHeader.granulePosition >= pcmFrameIndex) {
-            break; /* The sample is somewhere in the previous page. */
-        }
-
-        /*
-        At this point we know the sample is not in the previous page. It could possibly be in this page. For simplicity we
-        disregard any pages that do not begin a fresh packet.
-        */
-        if ((oggbs->currentPageHeader.headerType & 0x01) == 0) {    /* <-- Is it a fresh page? */
-            if (oggbs->currentPageHeader.segmentTable[0] >= 2) {
-                drflac_uint8 firstBytesInPage[2];
-                firstBytesInPage[0] = oggbs->pageData[0];
-                firstBytesInPage[1] = oggbs->pageData[1];
-
-                if ((firstBytesInPage[0] == 0xFF) && (firstBytesInPage[1] & 0xFC) == 0xF8) {    /* <-- Does the page begin with a frame's sync code? */
-                    runningGranulePosition = oggbs->currentPageHeader.granulePosition;
-                }
-
-                continue;
-            }
-        }
-    }
-
-    /*
-    We found the page that that is closest to the sample, so now we need to find it. The first thing to do is seek to the
-    start of that page. In the loop above we checked that it was a fresh page which means this page is also the start of
-    a new frame. This property means that after we've seeked to the page we can immediately start looping over frames until
-    we find the one containing the target sample.
-    */
-    if (!drflac_oggbs__seek_physical(oggbs, runningFrameBytePos, drflac_seek_origin_start)) {
-        return DRFLAC_FALSE;
-    }
-    if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
-        return DRFLAC_FALSE;
-    }
-
-    /*
-    At this point we'll be sitting on the first byte of the frame header of the first frame in the page. We just keep
-    looping over these frames until we find the one containing the sample we're after.
-    */
-    runningPCMFrameCount = runningGranulePosition;
-    for (;;) {
-        /*
-        There are two ways to find the sample and seek past irrelevant frames:
-          1) Use the native FLAC decoder.
-          2) Use Ogg's framing system.
-
-        Both of these options have their own pros and cons. Using the native FLAC decoder is slower because it needs to
-        do a full decode of the frame. Using Ogg's framing system is faster, but more complicated and involves some code
-        duplication for the decoding of frame headers.
-
-        Another thing to consider is that using the Ogg framing system will perform direct seeking of the physical Ogg
-        bitstream. This is important to consider because it means we cannot read data from the drflac_bs object using the
-        standard drflac__*() APIs because that will read in extra data for its own internal caching which in turn breaks
-        the positioning of the read pointer of the physical Ogg bitstream. Therefore, anything that would normally be read
-        using the native FLAC decoding APIs, such as drflac__read_next_flac_frame_header(), need to be re-implemented so as to
-        avoid the use of the drflac_bs object.
-
-        Considering these issues, I have decided to use the slower native FLAC decoding method for the following reasons:
-          1) Seeking is already partially accelerated using Ogg's paging system in the code block above.
-          2) Seeking in an Ogg encapsulated FLAC stream is probably quite uncommon.
-          3) Simplicity.
-        */
-        drflac_uint64 firstPCMFrameInFLACFrame = 0;
-        drflac_uint64 lastPCMFrameInFLACFrame = 0;
-        drflac_uint64 pcmFrameCountInThisFrame;
-
-        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
-            return DRFLAC_FALSE;
-        }
-
-        drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame);
-
-        pcmFrameCountInThisFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
-
-        /* If we are seeking to the end of the file and we've just hit it, we're done. */
-        if (pcmFrameIndex == pFlac->totalPCMFrameCount && (runningPCMFrameCount + pcmFrameCountInThisFrame) == pFlac->totalPCMFrameCount) {
-            drflac_result result = drflac__decode_flac_frame(pFlac);
-            if (result == DRFLAC_SUCCESS) {
-                pFlac->currentPCMFrame = pcmFrameIndex;
-                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
-                return DRFLAC_TRUE;
-            } else {
-                return DRFLAC_FALSE;
-            }
-        }
-
-        if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFrame)) {
-            /*
-            The sample should be in this FLAC frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend
-            it never existed and keep iterating.
-            */
-            drflac_result result = drflac__decode_flac_frame(pFlac);
-            if (result == DRFLAC_SUCCESS) {
-                /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */
-                drflac_uint64 pcmFramesToDecode = (size_t)(pcmFrameIndex - runningPCMFrameCount);    /* <-- Safe cast because the maximum number of samples in a frame is 65535. */
-                if (pcmFramesToDecode == 0) {
-                    return DRFLAC_TRUE;
-                }
-
-                pFlac->currentPCMFrame = runningPCMFrameCount;
-
-                return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;  /* <-- If this fails, something bad has happened (it should never fail). */
-            } else {
-                if (result == DRFLAC_CRC_MISMATCH) {
-                    continue;   /* CRC mismatch. Pretend this frame never existed. */
-                } else {
-                    return DRFLAC_FALSE;
-                }
-            }
-        } else {
-            /*
-            It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
-            frame never existed and leave the running sample count untouched.
-            */
-            drflac_result result = drflac__seek_to_next_flac_frame(pFlac);
-            if (result == DRFLAC_SUCCESS) {
-                runningPCMFrameCount += pcmFrameCountInThisFrame;
-            } else {
-                if (result == DRFLAC_CRC_MISMATCH) {
-                    continue;   /* CRC mismatch. Pretend this frame never existed. */
-                } else {
-                    return DRFLAC_FALSE;
-                }
-            }
-        }
-    }
-}
-
-
-
-static drflac_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed)
-{
-    drflac_ogg_page_header header;
-    drflac_uint32 crc32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32;
-    drflac_uint32 bytesRead = 0;
-
-    /* Pre Condition: The bit stream should be sitting just past the 4-byte OggS capture pattern. */
-    (void)relaxed;
-
-    pInit->container = drflac_container_ogg;
-    pInit->oggFirstBytePos = 0;
-
-    /*
-    We'll get here if the first 4 bytes of the stream were the OggS capture pattern, however it doesn't necessarily mean the
-    stream includes FLAC encoded audio. To check for this we need to scan the beginning-of-stream page markers and check if
-    any match the FLAC specification. Important to keep in mind that the stream may be multiplexed.
-    */
-    if (drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
-        return DRFLAC_FALSE;
-    }
-    pInit->runningFilePos += bytesRead;
-
-    for (;;) {
-        int pageBodySize;
-
-        /* Break if we're past the beginning of stream page. */
-        if ((header.headerType & 0x02) == 0) {
-            return DRFLAC_FALSE;
-        }
-
-        /* Check if it's a FLAC header. */
-        pageBodySize = drflac_ogg__get_page_body_size(&header);
-        if (pageBodySize == 51) {   /* 51 = the lacing value of the FLAC header packet. */
-            /* It could be a FLAC page... */
-            drflac_uint32 bytesRemainingInPage = pageBodySize;
-            drflac_uint8 packetType;
-
-            if (onRead(pUserData, &packetType, 1) != 1) {
-                return DRFLAC_FALSE;
-            }
-
-            bytesRemainingInPage -= 1;
-            if (packetType == 0x7F) {
-                /* Increasingly more likely to be a FLAC page... */
-                drflac_uint8 sig[4];
-                if (onRead(pUserData, sig, 4) != 4) {
-                    return DRFLAC_FALSE;
-                }
-
-                bytesRemainingInPage -= 4;
-                if (sig[0] == 'F' && sig[1] == 'L' && sig[2] == 'A' && sig[3] == 'C') {
-                    /* Almost certainly a FLAC page... */
-                    drflac_uint8 mappingVersion[2];
-                    if (onRead(pUserData, mappingVersion, 2) != 2) {
-                        return DRFLAC_FALSE;
-                    }
-
-                    if (mappingVersion[0] != 1) {
-                        return DRFLAC_FALSE;   /* Only supporting version 1.x of the Ogg mapping. */
-                    }
-
-                    /*
-                    The next 2 bytes are the non-audio packets, not including this one. We don't care about this because we're going to
-                    be handling it in a generic way based on the serial number and packet types.
-                    */
-                    if (!onSeek(pUserData, 2, drflac_seek_origin_current)) {
-                        return DRFLAC_FALSE;
-                    }
-
-                    /* Expecting the native FLAC signature "fLaC". */
-                    if (onRead(pUserData, sig, 4) != 4) {
-                        return DRFLAC_FALSE;
-                    }
-
-                    if (sig[0] == 'f' && sig[1] == 'L' && sig[2] == 'a' && sig[3] == 'C') {
-                        /* The remaining data in the page should be the STREAMINFO block. */
-                        drflac_streaminfo streaminfo;
-                        drflac_uint8 isLastBlock;
-                        drflac_uint8 blockType;
-                        drflac_uint32 blockSize;
-                        if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) {
-                            return DRFLAC_FALSE;
-                        }
-
-                        if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) {
-                            return DRFLAC_FALSE;    /* Invalid block type. First block must be the STREAMINFO block. */
-                        }
-
-                        if (drflac__read_streaminfo(onRead, pUserData, &streaminfo)) {
-                            /* Success! */
-                            pInit->hasStreamInfoBlock      = DRFLAC_TRUE;
-                            pInit->sampleRate              = streaminfo.sampleRate;
-                            pInit->channels                = streaminfo.channels;
-                            pInit->bitsPerSample           = streaminfo.bitsPerSample;
-                            pInit->totalPCMFrameCount      = streaminfo.totalPCMFrameCount;
-                            pInit->maxBlockSizeInPCMFrames = streaminfo.maxBlockSizeInPCMFrames;
-                            pInit->hasMetadataBlocks       = !isLastBlock;
-
-                            if (onMeta) {
-                                drflac_metadata metadata;
-                                metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO;
-                                metadata.pRawData = NULL;
-                                metadata.rawDataSize = 0;
-                                metadata.data.streaminfo = streaminfo;
-                                onMeta(pUserDataMD, &metadata);
-                            }
-
-                            pInit->runningFilePos  += pageBodySize;
-                            pInit->oggFirstBytePos  = pInit->runningFilePos - 79;   /* Subtracting 79 will place us right on top of the "OggS" identifier of the FLAC bos page. */
-                            pInit->oggSerial        = header.serialNumber;
-                            pInit->oggBosHeader     = header;
-                            break;
-                        } else {
-                            /* Failed to read STREAMINFO block. Aww, so close... */
-                            return DRFLAC_FALSE;
-                        }
-                    } else {
-                        /* Invalid file. */
-                        return DRFLAC_FALSE;
-                    }
-                } else {
-                    /* Not a FLAC header. Skip it. */
-                    if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) {
-                        return DRFLAC_FALSE;
-                    }
-                }
-            } else {
-                /* Not a FLAC header. Seek past the entire page and move on to the next. */
-                if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) {
-                    return DRFLAC_FALSE;
-                }
-            }
-        } else {
-            if (!onSeek(pUserData, pageBodySize, drflac_seek_origin_current)) {
-                return DRFLAC_FALSE;
-            }
-        }
-
-        pInit->runningFilePos += pageBodySize;
-
-
-        /* Read the header of the next page. */
-        if (drflac_ogg__read_page_header(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
-            return DRFLAC_FALSE;
-        }
-        pInit->runningFilePos += bytesRead;
-    }
-
-    /*
-    If we get here it means we found a FLAC audio stream. We should be sitting on the first byte of the header of the next page. The next
-    packets in the FLAC logical stream contain the metadata. The only thing left to do in the initialization phase for Ogg is to create the
-    Ogg bistream object.
-    */
-    pInit->hasMetadataBlocks = DRFLAC_TRUE;    /* <-- Always have at least VORBIS_COMMENT metadata block. */
-    return DRFLAC_TRUE;
-}
-#endif
-
-static drflac_bool32 drflac__init_private(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD)
-{
-    drflac_bool32 relaxed;
-    drflac_uint8 id[4];
-
-    if (pInit == NULL || onRead == NULL || onSeek == NULL) {
-        return DRFLAC_FALSE;
-    }
-
-    DRFLAC_ZERO_MEMORY(pInit, sizeof(*pInit));
-    pInit->onRead       = onRead;
-    pInit->onSeek       = onSeek;
-    pInit->onMeta       = onMeta;
-    pInit->container    = container;
-    pInit->pUserData    = pUserData;
-    pInit->pUserDataMD  = pUserDataMD;
-
-    pInit->bs.onRead    = onRead;
-    pInit->bs.onSeek    = onSeek;
-    pInit->bs.pUserData = pUserData;
-    drflac__reset_cache(&pInit->bs);
-
-
-    /* If the container is explicitly defined then we can try opening in relaxed mode. */
-    relaxed = container != drflac_container_unknown;
-
-    /* Skip over any ID3 tags. */
-    for (;;) {
-        if (onRead(pUserData, id, 4) != 4) {
-            return DRFLAC_FALSE;    /* Ran out of data. */
-        }
-        pInit->runningFilePos += 4;
-
-        if (id[0] == 'I' && id[1] == 'D' && id[2] == '3') {
-            drflac_uint8 header[6];
-            drflac_uint8 flags;
-            drflac_uint32 headerSize;
-
-            if (onRead(pUserData, header, 6) != 6) {
-                return DRFLAC_FALSE;    /* Ran out of data. */
-            }
-            pInit->runningFilePos += 6;
-
-            flags = header[1];
-
-            DRFLAC_COPY_MEMORY(&headerSize, header+2, 4);
-            headerSize = drflac__unsynchsafe_32(drflac__be2host_32(headerSize));
-            if (flags & 0x10) {
-                headerSize += 10;
-            }
-
-            if (!onSeek(pUserData, headerSize, drflac_seek_origin_current)) {
-                return DRFLAC_FALSE;    /* Failed to seek past the tag. */
-            }
-            pInit->runningFilePos += headerSize;
-        } else {
-            break;
-        }
-    }
-
-    if (id[0] == 'f' && id[1] == 'L' && id[2] == 'a' && id[3] == 'C') {
-        return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
-    }
-#ifndef DR_FLAC_NO_OGG
-    if (id[0] == 'O' && id[1] == 'g' && id[2] == 'g' && id[3] == 'S') {
-        return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
-    }
-#endif
-
-    /* If we get here it means we likely don't have a header. Try opening in relaxed mode, if applicable. */
-    if (relaxed) {
-        if (container == drflac_container_native) {
-            return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
-        }
-#ifndef DR_FLAC_NO_OGG
-        if (container == drflac_container_ogg) {
-            return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
-        }
-#endif
-    }
-
-    /* Unsupported container. */
-    return DRFLAC_FALSE;
-}
-
-static void drflac__init_from_info(drflac* pFlac, const drflac_init_info* pInit)
-{
-    DRFLAC_ASSERT(pFlac != NULL);
-    DRFLAC_ASSERT(pInit != NULL);
-
-    DRFLAC_ZERO_MEMORY(pFlac, sizeof(*pFlac));
-    pFlac->bs                      = pInit->bs;
-    pFlac->onMeta                  = pInit->onMeta;
-    pFlac->pUserDataMD             = pInit->pUserDataMD;
-    pFlac->maxBlockSizeInPCMFrames = pInit->maxBlockSizeInPCMFrames;
-    pFlac->sampleRate              = pInit->sampleRate;
-    pFlac->channels                = (drflac_uint8)pInit->channels;
-    pFlac->bitsPerSample           = (drflac_uint8)pInit->bitsPerSample;
-    pFlac->totalPCMFrameCount      = pInit->totalPCMFrameCount;
-    pFlac->container               = pInit->container;
-}
-
-
-static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    static drflac_init_info init;
-    drflac_uint32 allocationSize;
-    drflac_uint32 wholeSIMDVectorCountPerChannel;
-    drflac_uint32 decodedSamplesAllocationSize;
-#ifndef DR_FLAC_NO_OGG
-    static drflac_oggbs oggbs;
-#endif
-    drflac_uint64 firstFramePos;
-    drflac_uint64 seektablePos;
-    drflac_uint32 seektableSize;
-    drflac_allocation_callbacks allocationCallbacks;
-    drflac* pFlac;
-
-    /* CPU support first. */
-    drflac__init_cpu_caps();
-
-    if (!drflac__init_private(&init, onRead, onSeek, onMeta, container, pUserData, pUserDataMD)) {
-        return NULL;
-    }
-
-    if (pAllocationCallbacks != NULL) {
-        allocationCallbacks = *pAllocationCallbacks;
-        if (allocationCallbacks.onFree == NULL || (allocationCallbacks.onMalloc == NULL && allocationCallbacks.onRealloc == NULL)) {
-            return NULL;    /* Invalid allocation callbacks. */
-        }
-    } else {
-        allocationCallbacks.pUserData = NULL;
-        allocationCallbacks.onMalloc  = drflac__malloc_default;
-        allocationCallbacks.onRealloc = drflac__realloc_default;
-        allocationCallbacks.onFree    = drflac__free_default;
-    }
-
-
-    /*
-    The size of the allocation for the drflac object needs to be large enough to fit the following:
-      1) The main members of the drflac structure
-      2) A block of memory large enough to store the decoded samples of the largest frame in the stream
-      3) If the container is Ogg, a drflac_oggbs object
-
-    The complicated part of the allocation is making sure there's enough room the decoded samples, taking into consideration
-    the different SIMD instruction sets.
-    */
-    allocationSize = sizeof(drflac);
-
-    /*
-    The allocation size for decoded frames depends on the number of 32-bit integers that fit inside the largest SIMD vector
-    we are supporting.
-    */
-    if ((init.maxBlockSizeInPCMFrames % (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) == 0) {
-        wholeSIMDVectorCountPerChannel = (init.maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32)));
-    } else {
-        wholeSIMDVectorCountPerChannel = (init.maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) + 1;
-    }
-
-    decodedSamplesAllocationSize = wholeSIMDVectorCountPerChannel * DRFLAC_MAX_SIMD_VECTOR_SIZE * init.channels;
-
-    allocationSize += decodedSamplesAllocationSize;
-    allocationSize += DRFLAC_MAX_SIMD_VECTOR_SIZE;  /* Allocate extra bytes to ensure we have enough for alignment. */
-
-#ifndef DR_FLAC_NO_OGG
-    /* There's additional data required for Ogg streams. */
-    if (init.container == drflac_container_ogg) {
-        allocationSize += sizeof(drflac_oggbs);
-    }
-
-    DRFLAC_ZERO_MEMORY(&oggbs, sizeof(oggbs));
-    if (init.container == drflac_container_ogg) {
-        oggbs.onRead = onRead;
-        oggbs.onSeek = onSeek;
-        oggbs.pUserData = pUserData;
-        oggbs.currentBytePos = init.oggFirstBytePos;
-        oggbs.firstBytePos = init.oggFirstBytePos;
-        oggbs.serialNumber = init.oggSerial;
-        oggbs.bosPageHeader = init.oggBosHeader;
-        oggbs.bytesRemainingInPage = 0;
-    }
-#endif
-
-    /*
-    This part is a bit awkward. We need to load the seektable so that it can be referenced in-memory, but I want the drflac object to
-    consist of only a single heap allocation. To this, the size of the seek table needs to be known, which we determine when reading
-    and decoding the metadata.
-    */
-    firstFramePos = 42;   /* <-- We know we are at byte 42 at this point. */
-    seektablePos  = 0;
-    seektableSize = 0;
-    if (init.hasMetadataBlocks) {
-        drflac_read_proc onReadOverride = onRead;
-        drflac_seek_proc onSeekOverride = onSeek;
-        void* pUserDataOverride = pUserData;
-
-#ifndef DR_FLAC_NO_OGG
-        if (init.container == drflac_container_ogg) {
-            onReadOverride = drflac__on_read_ogg;
-            onSeekOverride = drflac__on_seek_ogg;
-            pUserDataOverride = (void*)&oggbs;
-        }
-#endif
-
-        if (!drflac__read_and_decode_metadata(onReadOverride, onSeekOverride, onMeta, pUserDataOverride, pUserDataMD, &firstFramePos, &seektablePos, &seektableSize, &allocationCallbacks)) {
-            return NULL;
-        }
-
-        allocationSize += seektableSize;
-    }
-
-
-    pFlac = (drflac*)drflac__malloc_from_callbacks(allocationSize, &allocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    drflac__init_from_info(pFlac, &init);
-    pFlac->allocationCallbacks = allocationCallbacks;
-    pFlac->pDecodedSamples = (drflac_int32*)drflac_align((size_t)pFlac->pExtraData, DRFLAC_MAX_SIMD_VECTOR_SIZE);
-
-#ifndef DR_FLAC_NO_OGG
-    if (init.container == drflac_container_ogg) {
-        drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + seektableSize);
-        *pInternalOggbs = oggbs;
-
-        /* The Ogg bistream needs to be layered on top of the original bitstream. */
-        pFlac->bs.onRead = drflac__on_read_ogg;
-        pFlac->bs.onSeek = drflac__on_seek_ogg;
-        pFlac->bs.pUserData = (void*)pInternalOggbs;
-        pFlac->_oggbs = (void*)pInternalOggbs;
-    }
-#endif
-
-    pFlac->firstFLACFramePosInBytes = firstFramePos;
-
-    /* NOTE: Seektables are not currently compatible with Ogg encapsulation (Ogg has its own accelerated seeking system). I may change this later, so I'm leaving this here for now. */
-#ifndef DR_FLAC_NO_OGG
-    if (init.container == drflac_container_ogg)
-    {
-        pFlac->pSeekpoints = NULL;
-        pFlac->seekpointCount = 0;
-    }
-    else
-#endif
-    {
-        /* If we have a seektable we need to load it now, making sure we move back to where we were previously. */
-        if (seektablePos != 0) {
-            pFlac->seekpointCount = seektableSize / sizeof(*pFlac->pSeekpoints);
-            pFlac->pSeekpoints = (drflac_seekpoint*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize);
-
-            DRFLAC_ASSERT(pFlac->bs.onSeek != NULL);
-            DRFLAC_ASSERT(pFlac->bs.onRead != NULL);
-
-            /* Seek to the seektable, then just read directly into our seektable buffer. */
-            if (pFlac->bs.onSeek(pFlac->bs.pUserData, (int)seektablePos, drflac_seek_origin_start)) {
-                if (pFlac->bs.onRead(pFlac->bs.pUserData, pFlac->pSeekpoints, seektableSize) == seektableSize) {
-                    /* Endian swap. */
-                    drflac_uint32 iSeekpoint;
-                    for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) {
-                        pFlac->pSeekpoints[iSeekpoint].firstPCMFrame   = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].firstPCMFrame);
-                        pFlac->pSeekpoints[iSeekpoint].flacFrameOffset = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].flacFrameOffset);
-                        pFlac->pSeekpoints[iSeekpoint].pcmFrameCount   = drflac__be2host_16(pFlac->pSeekpoints[iSeekpoint].pcmFrameCount);
-                    }
-                } else {
-                    /* Failed to read the seektable. Pretend we don't have one. */
-                    pFlac->pSeekpoints = NULL;
-                    pFlac->seekpointCount = 0;
-                }
-
-                /* We need to seek back to where we were. If this fails it's a critical error. */
-                if (!pFlac->bs.onSeek(pFlac->bs.pUserData, (int)pFlac->firstFLACFramePosInBytes, drflac_seek_origin_start)) {
-                    drflac__free_from_callbacks(pFlac, &allocationCallbacks);
-                    return NULL;
-                }
-            } else {
-                /* Failed to seek to the seektable. Ominous sign, but for now we can just pretend we don't have one. */
-                pFlac->pSeekpoints = NULL;
-                pFlac->seekpointCount = 0;
-            }
-        }
-    }
-
-
-    /*
-    If we get here, but don't have a STREAMINFO block, it means we've opened the stream in relaxed mode and need to decode
-    the first frame.
-    */
-    if (!init.hasStreamInfoBlock) {
-        pFlac->currentFLACFrame.header = init.firstFrameHeader;
-        for (;;) {
-            drflac_result result = drflac__decode_flac_frame(pFlac);
-            if (result == DRFLAC_SUCCESS) {
-                break;
-            } else {
-                if (result == DRFLAC_CRC_MISMATCH) {
-                    if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
-                        drflac__free_from_callbacks(pFlac, &allocationCallbacks);
-                        return NULL;
-                    }
-                    continue;
-                } else {
-                    drflac__free_from_callbacks(pFlac, &allocationCallbacks);
-                    return NULL;
-                }
-            }
-        }
-    }
-
-    return pFlac;
-}
-
-
-
-#ifndef DR_FLAC_NO_STDIO
-#include <stdio.h>
-#include <wchar.h>      /* For wcslen(), wcsrtombs() */
-
-/* drflac_result_from_errno() is only used for fopen() and wfopen() so putting it inside DR_WAV_NO_STDIO for now. If something else needs this later we can move it out. */
-#include <errno.h>
-static drflac_result drflac_result_from_errno(int e)
-{
-    switch (e)
-    {
-        case 0: return DRFLAC_SUCCESS;
-    #ifdef EPERM
-        case EPERM: return DRFLAC_INVALID_OPERATION;
-    #endif
-    #ifdef ENOENT
-        case ENOENT: return DRFLAC_DOES_NOT_EXIST;
-    #endif
-    #ifdef ESRCH
-        case ESRCH: return DRFLAC_DOES_NOT_EXIST;
-    #endif
-    #ifdef EINTR
-        case EINTR: return DRFLAC_INTERRUPT;
-    #endif
-    #ifdef EIO
-        case EIO: return DRFLAC_IO_ERROR;
-    #endif
-    #ifdef ENXIO
-        case ENXIO: return DRFLAC_DOES_NOT_EXIST;
-    #endif
-    #ifdef E2BIG
-        case E2BIG: return DRFLAC_INVALID_ARGS;
-    #endif
-    #ifdef ENOEXEC
-        case ENOEXEC: return DRFLAC_INVALID_FILE;
-    #endif
-    #ifdef EBADF
-        case EBADF: return DRFLAC_INVALID_FILE;
-    #endif
-    #ifdef ECHILD
-        case ECHILD: return DRFLAC_ERROR;
-    #endif
-    #ifdef EAGAIN
-        case EAGAIN: return DRFLAC_UNAVAILABLE;
-    #endif
-    #ifdef ENOMEM
-        case ENOMEM: return DRFLAC_OUT_OF_MEMORY;
-    #endif
-    #ifdef EACCES
-        case EACCES: return DRFLAC_ACCESS_DENIED;
-    #endif
-    #ifdef EFAULT
-        case EFAULT: return DRFLAC_BAD_ADDRESS;
-    #endif
-    #ifdef ENOTBLK
-        case ENOTBLK: return DRFLAC_ERROR;
-    #endif
-    #ifdef EBUSY
-        case EBUSY: return DRFLAC_BUSY;
-    #endif
-    #ifdef EEXIST
-        case EEXIST: return DRFLAC_ALREADY_EXISTS;
-    #endif
-    #ifdef EXDEV
-        case EXDEV: return DRFLAC_ERROR;
-    #endif
-    #ifdef ENODEV
-        case ENODEV: return DRFLAC_DOES_NOT_EXIST;
-    #endif
-    #ifdef ENOTDIR
-        case ENOTDIR: return DRFLAC_NOT_DIRECTORY;
-    #endif
-    #ifdef EISDIR
-        case EISDIR: return DRFLAC_IS_DIRECTORY;
-    #endif
-    #ifdef EINVAL
-        case EINVAL: return DRFLAC_INVALID_ARGS;
-    #endif
-    #ifdef ENFILE
-        case ENFILE: return DRFLAC_TOO_MANY_OPEN_FILES;
-    #endif
-    #ifdef EMFILE
-        case EMFILE: return DRFLAC_TOO_MANY_OPEN_FILES;
-    #endif
-    #ifdef ENOTTY
-        case ENOTTY: return DRFLAC_INVALID_OPERATION;
-    #endif
-    #ifdef ETXTBSY
-        case ETXTBSY: return DRFLAC_BUSY;
-    #endif
-    #ifdef EFBIG
-        case EFBIG: return DRFLAC_TOO_BIG;
-    #endif
-    #ifdef ENOSPC
-        case ENOSPC: return DRFLAC_NO_SPACE;
-    #endif
-    #ifdef ESPIPE
-        case ESPIPE: return DRFLAC_BAD_SEEK;
-    #endif
-    #ifdef EROFS
-        case EROFS: return DRFLAC_ACCESS_DENIED;
-    #endif
-    #ifdef EMLINK
-        case EMLINK: return DRFLAC_TOO_MANY_LINKS;
-    #endif
-    #ifdef EPIPE
-        case EPIPE: return DRFLAC_BAD_PIPE;
-    #endif
-    #ifdef EDOM
-        case EDOM: return DRFLAC_OUT_OF_RANGE;
-    #endif
-    #ifdef ERANGE
-        case ERANGE: return DRFLAC_OUT_OF_RANGE;
-    #endif
-    #ifdef EDEADLK
-        case EDEADLK: return DRFLAC_DEADLOCK;
-    #endif
-    #ifdef ENAMETOOLONG
-        case ENAMETOOLONG: return DRFLAC_PATH_TOO_LONG;
-    #endif
-    #ifdef ENOLCK
-        case ENOLCK: return DRFLAC_ERROR;
-    #endif
-    #ifdef ENOSYS
-        case ENOSYS: return DRFLAC_NOT_IMPLEMENTED;
-    #endif
-    #ifdef ENOTEMPTY
-        case ENOTEMPTY: return DRFLAC_DIRECTORY_NOT_EMPTY;
-    #endif
-    #ifdef ELOOP
-        case ELOOP: return DRFLAC_TOO_MANY_LINKS;
-    #endif
-    #ifdef ENOMSG
-        case ENOMSG: return DRFLAC_NO_MESSAGE;
-    #endif
-    #ifdef EIDRM
-        case EIDRM: return DRFLAC_ERROR;
-    #endif
-    #ifdef ECHRNG
-        case ECHRNG: return DRFLAC_ERROR;
-    #endif
-    #ifdef EL2NSYNC
-        case EL2NSYNC: return DRFLAC_ERROR;
-    #endif
-    #ifdef EL3HLT
-        case EL3HLT: return DRFLAC_ERROR;
-    #endif
-    #ifdef EL3RST
-        case EL3RST: return DRFLAC_ERROR;
-    #endif
-    #ifdef ELNRNG
-        case ELNRNG: return DRFLAC_OUT_OF_RANGE;
-    #endif
-    #ifdef EUNATCH
-        case EUNATCH: return DRFLAC_ERROR;
-    #endif
-    #ifdef ENOCSI
-        case ENOCSI: return DRFLAC_ERROR;
-    #endif
-    #ifdef EL2HLT
-        case EL2HLT: return DRFLAC_ERROR;
-    #endif
-    #ifdef EBADE
-        case EBADE: return DRFLAC_ERROR;
-    #endif
-    #ifdef EBADR
-        case EBADR: return DRFLAC_ERROR;
-    #endif
-    #ifdef EXFULL
-        case EXFULL: return DRFLAC_ERROR;
-    #endif
-    #ifdef ENOANO
-        case ENOANO: return DRFLAC_ERROR;
-    #endif
-    #ifdef EBADRQC
-        case EBADRQC: return DRFLAC_ERROR;
-    #endif
-    #ifdef EBADSLT
-        case EBADSLT: return DRFLAC_ERROR;
-    #endif
-    #ifdef EBFONT
-        case EBFONT: return DRFLAC_INVALID_FILE;
-    #endif
-    #ifdef ENOSTR
-        case ENOSTR: return DRFLAC_ERROR;
-    #endif
-    #ifdef ENODATA
-        case ENODATA: return DRFLAC_NO_DATA_AVAILABLE;
-    #endif
-    #ifdef ETIME
-        case ETIME: return DRFLAC_TIMEOUT;
-    #endif
-    #ifdef ENOSR
-        case ENOSR: return DRFLAC_NO_DATA_AVAILABLE;
-    #endif
-    #ifdef ENONET
-        case ENONET: return DRFLAC_NO_NETWORK;
-    #endif
-    #ifdef ENOPKG
-        case ENOPKG: return DRFLAC_ERROR;
-    #endif
-    #ifdef EREMOTE
-        case EREMOTE: return DRFLAC_ERROR;
-    #endif
-    #ifdef ENOLINK
-        case ENOLINK: return DRFLAC_ERROR;
-    #endif
-    #ifdef EADV
-        case EADV: return DRFLAC_ERROR;
-    #endif
-    #ifdef ESRMNT
-        case ESRMNT: return DRFLAC_ERROR;
-    #endif
-    #ifdef ECOMM
-        case ECOMM: return DRFLAC_ERROR;
-    #endif
-    #ifdef EPROTO
-        case EPROTO: return DRFLAC_ERROR;
-    #endif
-    #ifdef EMULTIHOP
-        case EMULTIHOP: return DRFLAC_ERROR;
-    #endif
-    #ifdef EDOTDOT
-        case EDOTDOT: return DRFLAC_ERROR;
-    #endif
-    #ifdef EBADMSG
-        case EBADMSG: return DRFLAC_BAD_MESSAGE;
-    #endif
-    #ifdef EOVERFLOW
-        case EOVERFLOW: return DRFLAC_TOO_BIG;
-    #endif
-    #ifdef ENOTUNIQ
-        case ENOTUNIQ: return DRFLAC_NOT_UNIQUE;
-    #endif
-    #ifdef EBADFD
-        case EBADFD: return DRFLAC_ERROR;
-    #endif
-    #ifdef EREMCHG
-        case EREMCHG: return DRFLAC_ERROR;
-    #endif
-    #ifdef ELIBACC
-        case ELIBACC: return DRFLAC_ACCESS_DENIED;
-    #endif
-    #ifdef ELIBBAD
-        case ELIBBAD: return DRFLAC_INVALID_FILE;
-    #endif
-    #ifdef ELIBSCN
-        case ELIBSCN: return DRFLAC_INVALID_FILE;
-    #endif
-    #ifdef ELIBMAX
-        case ELIBMAX: return DRFLAC_ERROR;
-    #endif
-    #ifdef ELIBEXEC
-        case ELIBEXEC: return DRFLAC_ERROR;
-    #endif
-    #ifdef EILSEQ
-        case EILSEQ: return DRFLAC_INVALID_DATA;
-    #endif
-    #ifdef ERESTART
-        case ERESTART: return DRFLAC_ERROR;
-    #endif
-    #ifdef ESTRPIPE
-        case ESTRPIPE: return DRFLAC_ERROR;
-    #endif
-    #ifdef EUSERS
-        case EUSERS: return DRFLAC_ERROR;
-    #endif
-    #ifdef ENOTSOCK
-        case ENOTSOCK: return DRFLAC_NOT_SOCKET;
-    #endif
-    #ifdef EDESTADDRREQ
-        case EDESTADDRREQ: return DRFLAC_NO_ADDRESS;
-    #endif
-    #ifdef EMSGSIZE
-        case EMSGSIZE: return DRFLAC_TOO_BIG;
-    #endif
-    #ifdef EPROTOTYPE
-        case EPROTOTYPE: return DRFLAC_BAD_PROTOCOL;
-    #endif
-    #ifdef ENOPROTOOPT
-        case ENOPROTOOPT: return DRFLAC_PROTOCOL_UNAVAILABLE;
-    #endif
-    #ifdef EPROTONOSUPPORT
-        case EPROTONOSUPPORT: return DRFLAC_PROTOCOL_NOT_SUPPORTED;
-    #endif
-    #ifdef ESOCKTNOSUPPORT
-        case ESOCKTNOSUPPORT: return DRFLAC_SOCKET_NOT_SUPPORTED;
-    #endif
-    #ifdef EOPNOTSUPP
-        case EOPNOTSUPP: return DRFLAC_INVALID_OPERATION;
-    #endif
-    #ifdef EPFNOSUPPORT
-        case EPFNOSUPPORT: return DRFLAC_PROTOCOL_FAMILY_NOT_SUPPORTED;
-    #endif
-    #ifdef EAFNOSUPPORT
-        case EAFNOSUPPORT: return DRFLAC_ADDRESS_FAMILY_NOT_SUPPORTED;
-    #endif
-    #ifdef EADDRINUSE
-        case EADDRINUSE: return DRFLAC_ALREADY_IN_USE;
-    #endif
-    #ifdef EADDRNOTAVAIL
-        case EADDRNOTAVAIL: return DRFLAC_ERROR;
-    #endif
-    #ifdef ENETDOWN
-        case ENETDOWN: return DRFLAC_NO_NETWORK;
-    #endif
-    #ifdef ENETUNREACH
-        case ENETUNREACH: return DRFLAC_NO_NETWORK;
-    #endif
-    #ifdef ENETRESET
-        case ENETRESET: return DRFLAC_NO_NETWORK;
-    #endif
-    #ifdef ECONNABORTED
-        case ECONNABORTED: return DRFLAC_NO_NETWORK;
-    #endif
-    #ifdef ECONNRESET
-        case ECONNRESET: return DRFLAC_CONNECTION_RESET;
-    #endif
-    #ifdef ENOBUFS
-        case ENOBUFS: return DRFLAC_NO_SPACE;
-    #endif
-    #ifdef EISCONN
-        case EISCONN: return DRFLAC_ALREADY_CONNECTED;
-    #endif
-    #ifdef ENOTCONN
-        case ENOTCONN: return DRFLAC_NOT_CONNECTED;
-    #endif
-    #ifdef ESHUTDOWN
-        case ESHUTDOWN: return DRFLAC_ERROR;
-    #endif
-    #ifdef ETOOMANYREFS
-        case ETOOMANYREFS: return DRFLAC_ERROR;
-    #endif
-    #ifdef ETIMEDOUT
-        case ETIMEDOUT: return DRFLAC_TIMEOUT;
-    #endif
-    #ifdef ECONNREFUSED
-        case ECONNREFUSED: return DRFLAC_CONNECTION_REFUSED;
-    #endif
-    #ifdef EHOSTDOWN
-        case EHOSTDOWN: return DRFLAC_NO_HOST;
-    #endif
-    #ifdef EHOSTUNREACH
-        case EHOSTUNREACH: return DRFLAC_NO_HOST;
-    #endif
-    #ifdef EALREADY
-        case EALREADY: return DRFLAC_IN_PROGRESS;
-    #endif
-    #ifdef EINPROGRESS
-        case EINPROGRESS: return DRFLAC_IN_PROGRESS;
-    #endif
-    #ifdef ESTALE
-        case ESTALE: return DRFLAC_INVALID_FILE;
-    #endif
-    #ifdef EUCLEAN
-        case EUCLEAN: return DRFLAC_ERROR;
-    #endif
-    #ifdef ENOTNAM
-        case ENOTNAM: return DRFLAC_ERROR;
-    #endif
-    #ifdef ENAVAIL
-        case ENAVAIL: return DRFLAC_ERROR;
-    #endif
-    #ifdef EISNAM
-        case EISNAM: return DRFLAC_ERROR;
-    #endif
-    #ifdef EREMOTEIO
-        case EREMOTEIO: return DRFLAC_IO_ERROR;
-    #endif
-    #ifdef EDQUOT
-        case EDQUOT: return DRFLAC_NO_SPACE;
-    #endif
-    #ifdef ENOMEDIUM
-        case ENOMEDIUM: return DRFLAC_DOES_NOT_EXIST;
-    #endif
-    #ifdef EMEDIUMTYPE
-        case EMEDIUMTYPE: return DRFLAC_ERROR;
-    #endif
-    #ifdef ECANCELED
-        case ECANCELED: return DRFLAC_CANCELLED;
-    #endif
-    #ifdef ENOKEY
-        case ENOKEY: return DRFLAC_ERROR;
-    #endif
-    #ifdef EKEYEXPIRED
-        case EKEYEXPIRED: return DRFLAC_ERROR;
-    #endif
-    #ifdef EKEYREVOKED
-        case EKEYREVOKED: return DRFLAC_ERROR;
-    #endif
-    #ifdef EKEYREJECTED
-        case EKEYREJECTED: return DRFLAC_ERROR;
-    #endif
-    #ifdef EOWNERDEAD
-        case EOWNERDEAD: return DRFLAC_ERROR;
-    #endif
-    #ifdef ENOTRECOVERABLE
-        case ENOTRECOVERABLE: return DRFLAC_ERROR;
-    #endif
-    #ifdef ERFKILL
-        case ERFKILL: return DRFLAC_ERROR;
-    #endif
-    #ifdef EHWPOISON
-        case EHWPOISON: return DRFLAC_ERROR;
-    #endif
-        default: return DRFLAC_ERROR;
-    }
-}
-
-static drflac_result drflac_fopen(FILE** ppFile, const char* pFilePath, const char* pOpenMode)
-{
-#if _MSC_VER && _MSC_VER >= 1400
-    errno_t err;
-#endif
-
-    if (ppFile != NULL) {
-        *ppFile = NULL;  /* Safety. */
-    }
-
-    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
-        return DRFLAC_INVALID_ARGS;
-    }
-
-#if _MSC_VER && _MSC_VER >= 1400
-    err = fopen_s(ppFile, pFilePath, pOpenMode);
-    if (err != 0) {
-        return drflac_result_from_errno(err);
-    }
-#else
-#if defined(_WIN32) || defined(__APPLE__)
-    *ppFile = fopen(pFilePath, pOpenMode);
-#else
-    #if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS == 64 && defined(_LARGEFILE64_SOURCE)
-        *ppFile = fopen64(pFilePath, pOpenMode);
-    #else
-        *ppFile = fopen(pFilePath, pOpenMode);
-    #endif
-#endif
-    if (*ppFile == NULL) {
-        drflac_result result = drflac_result_from_errno(errno);
-        if (result == DRFLAC_SUCCESS) {
-            result = DRFLAC_ERROR;   /* Just a safety check to make sure we never ever return success when pFile == NULL. */
-        }
-
-        return result;
-    }
-#endif
-
-    return DRFLAC_SUCCESS;
-}
-
-/*
-_wfopen() isn't always available in all compilation environments.
-
-    * Windows only.
-    * MSVC seems to support it universally as far back as VC6 from what I can tell (haven't checked further back).
-    * MinGW-64 (both 32- and 64-bit) seems to support it.
-    * MinGW wraps it in !defined(__STRICT_ANSI__).
-
-This can be reviewed as compatibility issues arise. The preference is to use _wfopen_s() and _wfopen() as opposed to the wcsrtombs()
-fallback, so if you notice your compiler not detecting this properly I'm happy to look at adding support.
-*/
-#if defined(_WIN32)
-    #if defined(_MSC_VER) || defined(__MINGW64__) || !defined(__STRICT_ANSI__)
-        #define DRFLAC_HAS_WFOPEN
-    #endif
-#endif
-
-static drflac_result drflac_wfopen(FILE** ppFile, const wchar_t* pFilePath, const wchar_t* pOpenMode, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    if (ppFile != NULL) {
-        *ppFile = NULL;  /* Safety. */
-    }
-
-    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
-        return DRFLAC_INVALID_ARGS;
-    }
-
-#if defined(DRFLAC_HAS_WFOPEN)
-    {
-        /* Use _wfopen() on Windows. */
-    #if defined(_MSC_VER) && _MSC_VER >= 1400
-        errno_t err = _wfopen_s(ppFile, pFilePath, pOpenMode);
-        if (err != 0) {
-            return drflac_result_from_errno(err);
-        }
-    #else
-        *ppFile = _wfopen(pFilePath, pOpenMode);
-        if (*ppFile == NULL) {
-            return drflac_result_from_errno(errno);
-        }
-    #endif
-        (void)pAllocationCallbacks;
-    }
-#else
-    /*
-    Use fopen() on anything other than Windows. Requires a conversion. This is annoying because fopen() is locale specific. The only real way I can
-    think of to do this is with wcsrtombs(). Note that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for
-    maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler error I'll look into improving compatibility.
-    */
-    {
-        mbstate_t mbs;
-        size_t lenMB;
-        const wchar_t* pFilePathTemp = pFilePath;
-        char* pFilePathMB = NULL;
-        char pOpenModeMB[32] = {0};
-
-        /* Get the length first. */
-        DRFLAC_ZERO_OBJECT(&mbs);
-        lenMB = wcsrtombs(NULL, &pFilePathTemp, 0, &mbs);
-        if (lenMB == (size_t)-1) {
-            return drflac_result_from_errno(errno);
-        }
-
-        pFilePathMB = (char*)drflac__malloc_from_callbacks(lenMB + 1, pAllocationCallbacks);
-        if (pFilePathMB == NULL) {
-            return DRFLAC_OUT_OF_MEMORY;
-        }
-
-        pFilePathTemp = pFilePath;
-        DRFLAC_ZERO_OBJECT(&mbs);
-        wcsrtombs(pFilePathMB, &pFilePathTemp, lenMB + 1, &mbs);
-
-        /* The open mode should always consist of ASCII characters so we should be able to do a trivial conversion. */
-        {
-            size_t i = 0;
-            for (;;) {
-                if (pOpenMode[i] == 0) {
-                    pOpenModeMB[i] = '\0';
-                    break;
-                }
-
-                pOpenModeMB[i] = (char)pOpenMode[i];
-                i += 1;
-            }
-        }
-
-        *ppFile = fopen(pFilePathMB, pOpenModeMB);
-
-        drflac__free_from_callbacks(pFilePathMB, pAllocationCallbacks);
-    }
-
-    if (*ppFile == NULL) {
-        return DRFLAC_ERROR;
-    }
-#endif
-
-    return DRFLAC_SUCCESS;
-}
-
-static size_t drflac__on_read_stdio(void* pUserData, void* bufferOut, size_t bytesToRead)
-{
-    return fread(bufferOut, 1, bytesToRead, (FILE*)pUserData);
-}
-
-static drflac_bool32 drflac__on_seek_stdio(void* pUserData, int offset, drflac_seek_origin origin)
-{
-    DRFLAC_ASSERT(offset >= 0);  /* <-- Never seek backwards. */
-
-    return fseek((FILE*)pUserData, offset, (origin == drflac_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0;
-}
-
-
-DRFLAC_API drflac* drflac_open_file(const char* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-    FILE* pFile;
-
-    if (drflac_fopen(&pFile, pFileName, "rb") != DRFLAC_SUCCESS) {
-        return NULL;
-    }
-
-    pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        fclose(pFile);
-        return NULL;
-    }
-
-    return pFlac;
-}
-
-DRFLAC_API drflac* drflac_open_file_w(const wchar_t* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-    FILE* pFile;
-
-    if (drflac_wfopen(&pFile, pFileName, L"rb", pAllocationCallbacks) != DRFLAC_SUCCESS) {
-        return NULL;
-    }
-
-    pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        fclose(pFile);
-        return NULL;
-    }
-
-    return pFlac;
-}
-
-DRFLAC_API drflac* drflac_open_file_with_metadata(const char* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-    FILE* pFile;
-
-    if (drflac_fopen(&pFile, pFileName, "rb") != DRFLAC_SUCCESS) {
-        return NULL;
-    }
-
-    pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, onMeta, drflac_container_unknown, (void*)pFile, pUserData, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        fclose(pFile);
-        return pFlac;
-    }
-
-    return pFlac;
-}
-
-DRFLAC_API drflac* drflac_open_file_with_metadata_w(const wchar_t* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-    FILE* pFile;
-
-    if (drflac_wfopen(&pFile, pFileName, L"rb", pAllocationCallbacks) != DRFLAC_SUCCESS) {
-        return NULL;
-    }
-
-    pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, onMeta, drflac_container_unknown, (void*)pFile, pUserData, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        fclose(pFile);
-        return pFlac;
-    }
-
-    return pFlac;
-}
-#endif  /* DR_FLAC_NO_STDIO */
-
-static size_t drflac__on_read_memory(void* pUserData, void* bufferOut, size_t bytesToRead)
-{
-    drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData;
-    size_t bytesRemaining;
-
-    DRFLAC_ASSERT(memoryStream != NULL);
-    DRFLAC_ASSERT(memoryStream->dataSize >= memoryStream->currentReadPos);
-
-    bytesRemaining = memoryStream->dataSize - memoryStream->currentReadPos;
-    if (bytesToRead > bytesRemaining) {
-        bytesToRead = bytesRemaining;
-    }
-
-    if (bytesToRead > 0) {
-        DRFLAC_COPY_MEMORY(bufferOut, memoryStream->data + memoryStream->currentReadPos, bytesToRead);
-        memoryStream->currentReadPos += bytesToRead;
-    }
-
-    return bytesToRead;
-}
-
-static drflac_bool32 drflac__on_seek_memory(void* pUserData, int offset, drflac_seek_origin origin)
-{
-    drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData;
-
-    DRFLAC_ASSERT(memoryStream != NULL);
-    DRFLAC_ASSERT(offset >= 0); /* <-- Never seek backwards. */
-
-    if (offset > (drflac_int64)memoryStream->dataSize) {
-        return DRFLAC_FALSE;
-    }
-
-    if (origin == drflac_seek_origin_current) {
-        if (memoryStream->currentReadPos + offset <= memoryStream->dataSize) {
-            memoryStream->currentReadPos += offset;
-        } else {
-            return DRFLAC_FALSE;  /* Trying to seek too far forward. */
-        }
-    } else {
-        if ((drflac_uint32)offset <= memoryStream->dataSize) {
-            memoryStream->currentReadPos = offset;
-        } else {
-            return DRFLAC_FALSE;  /* Trying to seek too far forward. */
-        }
-    }
-
-    return DRFLAC_TRUE;
-}
-
-DRFLAC_API drflac* drflac_open_memory(const void* pData, size_t dataSize, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac__memory_stream memoryStream;
-    drflac* pFlac;
-
-    memoryStream.data = (const drflac_uint8*)pData;
-    memoryStream.dataSize = dataSize;
-    memoryStream.currentReadPos = 0;
-    pFlac = drflac_open(drflac__on_read_memory, drflac__on_seek_memory, &memoryStream, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    pFlac->memoryStream = memoryStream;
-
-    /* This is an awful hack... */
-#ifndef DR_FLAC_NO_OGG
-    if (pFlac->container == drflac_container_ogg)
-    {
-        drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
-        oggbs->pUserData = &pFlac->memoryStream;
-    }
-    else
-#endif
-    {
-        pFlac->bs.pUserData = &pFlac->memoryStream;
-    }
-
-    return pFlac;
-}
-
-DRFLAC_API drflac* drflac_open_memory_with_metadata(const void* pData, size_t dataSize, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac__memory_stream memoryStream;
-    drflac* pFlac;
-
-    memoryStream.data = (const drflac_uint8*)pData;
-    memoryStream.dataSize = dataSize;
-    memoryStream.currentReadPos = 0;
-    pFlac = drflac_open_with_metadata_private(drflac__on_read_memory, drflac__on_seek_memory, onMeta, drflac_container_unknown, &memoryStream, pUserData, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    pFlac->memoryStream = memoryStream;
-
-    /* This is an awful hack... */
-#ifndef DR_FLAC_NO_OGG
-    if (pFlac->container == drflac_container_ogg)
-    {
-        drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
-        oggbs->pUserData = &pFlac->memoryStream;
-    }
-    else
-#endif
-    {
-        pFlac->bs.pUserData = &pFlac->memoryStream;
-    }
-
-    return pFlac;
-}
-
-
-
-DRFLAC_API drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    return drflac_open_with_metadata_private(onRead, onSeek, NULL, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks);
-}
-DRFLAC_API drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    return drflac_open_with_metadata_private(onRead, onSeek, NULL, container, pUserData, pUserData, pAllocationCallbacks);
-}
-
-DRFLAC_API drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    return drflac_open_with_metadata_private(onRead, onSeek, onMeta, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks);
-}
-DRFLAC_API drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    return drflac_open_with_metadata_private(onRead, onSeek, onMeta, container, pUserData, pUserData, pAllocationCallbacks);
-}
-
-DRFLAC_API void drflac_close(drflac* pFlac)
-{
-    if (pFlac == NULL) {
-        return;
-    }
-
-#ifndef DR_FLAC_NO_STDIO
-    /*
-    If we opened the file with drflac_open_file() we will want to close the file handle. We can know whether or not drflac_open_file()
-    was used by looking at the callbacks.
-    */
-    if (pFlac->bs.onRead == drflac__on_read_stdio) {
-        fclose((FILE*)pFlac->bs.pUserData);
-    }
-
-#ifndef DR_FLAC_NO_OGG
-    /* Need to clean up Ogg streams a bit differently due to the way the bit streaming is chained. */
-    if (pFlac->container == drflac_container_ogg) {
-        drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
-        DRFLAC_ASSERT(pFlac->bs.onRead == drflac__on_read_ogg);
-
-        if (oggbs->onRead == drflac__on_read_stdio) {
-            fclose((FILE*)oggbs->pUserData);
-        }
-    }
-#endif
-#endif
-
-    drflac__free_from_callbacks(pFlac, &pFlac->allocationCallbacks);
-}
-
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    for (i = 0; i < frameCount; ++i) {
-        drflac_uint32 left  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-        drflac_uint32 side  = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-        drflac_uint32 right = left - side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left;
-        pOutputSamples[i*2+1] = (drflac_int32)right;
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    for (i = 0; i < frameCount4; ++i) {
-        drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0;
-        drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0;
-        drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0;
-        drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0;
-
-        drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1;
-        drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1;
-        drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1;
-        drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1;
-
-        drflac_uint32 right0 = left0 - side0;
-        drflac_uint32 right1 = left1 - side1;
-        drflac_uint32 right2 = left2 - side2;
-        drflac_uint32 right3 = left3 - side3;
-
-        pOutputSamples[i*8+0] = (drflac_int32)left0;
-        pOutputSamples[i*8+1] = (drflac_int32)right0;
-        pOutputSamples[i*8+2] = (drflac_int32)left1;
-        pOutputSamples[i*8+3] = (drflac_int32)right1;
-        pOutputSamples[i*8+4] = (drflac_int32)left2;
-        pOutputSamples[i*8+5] = (drflac_int32)right2;
-        pOutputSamples[i*8+6] = (drflac_int32)left3;
-        pOutputSamples[i*8+7] = (drflac_int32)right3;
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
-        drflac_uint32 right = left - side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left;
-        pOutputSamples[i*2+1] = (drflac_int32)right;
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    for (i = 0; i < frameCount4; ++i) {
-        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
-        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
-        __m128i right = _mm_sub_epi32(left, side);
-
-        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
-        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
-        drflac_uint32 right = left - side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left;
-        pOutputSamples[i*2+1] = (drflac_int32)right;
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-    int32x4_t shift0_4;
-    int32x4_t shift1_4;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    shift0_4 = vdupq_n_s32(shift0);
-    shift1_4 = vdupq_n_s32(shift1);
-
-    for (i = 0; i < frameCount4; ++i) {
-        uint32x4_t left;
-        uint32x4_t side;
-        uint32x4_t right;
-
-        left  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
-        side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
-        right = vsubq_u32(left, side);
-
-        drflac__vst2q_u32((drflac_uint32*)pOutputSamples + i*8, vzipq_u32(left, right));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
-        drflac_uint32 right = left - side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left;
-        pOutputSamples[i*2+1] = (drflac_int32)right;
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_s32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_s32__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    for (i = 0; i < frameCount; ++i) {
-        drflac_uint32 side  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-        drflac_uint32 right = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-        drflac_uint32 left  = right + side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left;
-        pOutputSamples[i*2+1] = (drflac_int32)right;
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    for (i = 0; i < frameCount4; ++i) {
-        drflac_uint32 side0  = pInputSamples0U32[i*4+0] << shift0;
-        drflac_uint32 side1  = pInputSamples0U32[i*4+1] << shift0;
-        drflac_uint32 side2  = pInputSamples0U32[i*4+2] << shift0;
-        drflac_uint32 side3  = pInputSamples0U32[i*4+3] << shift0;
-
-        drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1;
-        drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1;
-        drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1;
-        drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1;
-
-        drflac_uint32 left0 = right0 + side0;
-        drflac_uint32 left1 = right1 + side1;
-        drflac_uint32 left2 = right2 + side2;
-        drflac_uint32 left3 = right3 + side3;
-
-        pOutputSamples[i*8+0] = (drflac_int32)left0;
-        pOutputSamples[i*8+1] = (drflac_int32)right0;
-        pOutputSamples[i*8+2] = (drflac_int32)left1;
-        pOutputSamples[i*8+3] = (drflac_int32)right1;
-        pOutputSamples[i*8+4] = (drflac_int32)left2;
-        pOutputSamples[i*8+5] = (drflac_int32)right2;
-        pOutputSamples[i*8+6] = (drflac_int32)left3;
-        pOutputSamples[i*8+7] = (drflac_int32)right3;
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 right = pInputSamples1U32[i] << shift1;
-        drflac_uint32 left  = right + side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left;
-        pOutputSamples[i*2+1] = (drflac_int32)right;
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    for (i = 0; i < frameCount4; ++i) {
-        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
-        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
-        __m128i left  = _mm_add_epi32(right, side);
-
-        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
-        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 right = pInputSamples1U32[i] << shift1;
-        drflac_uint32 left  = right + side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left;
-        pOutputSamples[i*2+1] = (drflac_int32)right;
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-    int32x4_t shift0_4;
-    int32x4_t shift1_4;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    shift0_4 = vdupq_n_s32(shift0);
-    shift1_4 = vdupq_n_s32(shift1);
-
-    for (i = 0; i < frameCount4; ++i) {
-        uint32x4_t side;
-        uint32x4_t right;
-        uint32x4_t left;
-
-        side  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
-        right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
-        left  = vaddq_u32(right, side);
-
-        drflac__vst2q_u32((drflac_uint32*)pOutputSamples + i*8, vzipq_u32(left, right));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 right = pInputSamples1U32[i] << shift1;
-        drflac_uint32 left  = right + side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left;
-        pOutputSamples[i*2+1] = (drflac_int32)right;
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_s32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_s32__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    for (drflac_uint64 i = 0; i < frameCount; ++i) {
-        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-        mid = (mid << 1) | (side & 0x01);
-
-        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample);
-        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample);
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_int32 shift = unusedBitsPerSample;
-
-    if (shift > 0) {
-        shift -= 1;
-        for (i = 0; i < frameCount4; ++i) {
-            drflac_uint32 temp0L;
-            drflac_uint32 temp1L;
-            drflac_uint32 temp2L;
-            drflac_uint32 temp3L;
-            drflac_uint32 temp0R;
-            drflac_uint32 temp1R;
-            drflac_uint32 temp2R;
-            drflac_uint32 temp3R;
-
-            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-
-            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid0 = (mid0 << 1) | (side0 & 0x01);
-            mid1 = (mid1 << 1) | (side1 & 0x01);
-            mid2 = (mid2 << 1) | (side2 & 0x01);
-            mid3 = (mid3 << 1) | (side3 & 0x01);
-
-            temp0L = (mid0 + side0) << shift;
-            temp1L = (mid1 + side1) << shift;
-            temp2L = (mid2 + side2) << shift;
-            temp3L = (mid3 + side3) << shift;
-
-            temp0R = (mid0 - side0) << shift;
-            temp1R = (mid1 - side1) << shift;
-            temp2R = (mid2 - side2) << shift;
-            temp3R = (mid3 - side3) << shift;
-
-            pOutputSamples[i*8+0] = (drflac_int32)temp0L;
-            pOutputSamples[i*8+1] = (drflac_int32)temp0R;
-            pOutputSamples[i*8+2] = (drflac_int32)temp1L;
-            pOutputSamples[i*8+3] = (drflac_int32)temp1R;
-            pOutputSamples[i*8+4] = (drflac_int32)temp2L;
-            pOutputSamples[i*8+5] = (drflac_int32)temp2R;
-            pOutputSamples[i*8+6] = (drflac_int32)temp3L;
-            pOutputSamples[i*8+7] = (drflac_int32)temp3R;
-        }
-    } else {
-        for (i = 0; i < frameCount4; ++i) {
-            drflac_uint32 temp0L;
-            drflac_uint32 temp1L;
-            drflac_uint32 temp2L;
-            drflac_uint32 temp3L;
-            drflac_uint32 temp0R;
-            drflac_uint32 temp1R;
-            drflac_uint32 temp2R;
-            drflac_uint32 temp3R;
-
-            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-
-            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid0 = (mid0 << 1) | (side0 & 0x01);
-            mid1 = (mid1 << 1) | (side1 & 0x01);
-            mid2 = (mid2 << 1) | (side2 & 0x01);
-            mid3 = (mid3 << 1) | (side3 & 0x01);
-
-            temp0L = (drflac_uint32)((drflac_int32)(mid0 + side0) >> 1);
-            temp1L = (drflac_uint32)((drflac_int32)(mid1 + side1) >> 1);
-            temp2L = (drflac_uint32)((drflac_int32)(mid2 + side2) >> 1);
-            temp3L = (drflac_uint32)((drflac_int32)(mid3 + side3) >> 1);
-
-            temp0R = (drflac_uint32)((drflac_int32)(mid0 - side0) >> 1);
-            temp1R = (drflac_uint32)((drflac_int32)(mid1 - side1) >> 1);
-            temp2R = (drflac_uint32)((drflac_int32)(mid2 - side2) >> 1);
-            temp3R = (drflac_uint32)((drflac_int32)(mid3 - side3) >> 1);
-
-            pOutputSamples[i*8+0] = (drflac_int32)temp0L;
-            pOutputSamples[i*8+1] = (drflac_int32)temp0R;
-            pOutputSamples[i*8+2] = (drflac_int32)temp1L;
-            pOutputSamples[i*8+3] = (drflac_int32)temp1R;
-            pOutputSamples[i*8+4] = (drflac_int32)temp2L;
-            pOutputSamples[i*8+5] = (drflac_int32)temp2R;
-            pOutputSamples[i*8+6] = (drflac_int32)temp3L;
-            pOutputSamples[i*8+7] = (drflac_int32)temp3R;
-        }
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-        mid = (mid << 1) | (side & 0x01);
-
-        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample);
-        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample);
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_int32 shift = unusedBitsPerSample;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    if (shift == 0) {
-        for (i = 0; i < frameCount4; ++i) {
-            __m128i mid;
-            __m128i side;
-            __m128i left;
-            __m128i right;
-
-            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-
-            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
-
-            left  = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
-            right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
-
-            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
-            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = (drflac_int32)(mid + side) >> 1;
-            pOutputSamples[i*2+1] = (drflac_int32)(mid - side) >> 1;
-        }
-    } else {
-        shift -= 1;
-        for (i = 0; i < frameCount4; ++i) {
-            __m128i mid;
-            __m128i side;
-            __m128i left;
-            __m128i right;
-
-            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-
-            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
-
-            left  = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
-            right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
-
-            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
-            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift);
-            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift);
-        }
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_int32 shift = unusedBitsPerSample;
-    int32x4_t  wbpsShift0_4; /* wbps = Wasted Bits Per Sample */
-    int32x4_t  wbpsShift1_4; /* wbps = Wasted Bits Per Sample */
-    uint32x4_t one4;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    wbpsShift0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-    wbpsShift1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-    one4         = vdupq_n_u32(1);
-
-    if (shift == 0) {
-        for (i = 0; i < frameCount4; ++i) {
-            uint32x4_t mid;
-            uint32x4_t side;
-            int32x4_t left;
-            int32x4_t right;
-
-            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
-            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
-
-            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, one4));
-
-            left  = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
-            right = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
-
-            drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = (drflac_int32)(mid + side) >> 1;
-            pOutputSamples[i*2+1] = (drflac_int32)(mid - side) >> 1;
-        }
-    } else {
-        int32x4_t shift4;
-
-        shift -= 1;
-        shift4 = vdupq_n_s32(shift);
-
-        for (i = 0; i < frameCount4; ++i) {
-            uint32x4_t mid;
-            uint32x4_t side;
-            int32x4_t left;
-            int32x4_t right;
-
-            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
-            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
-
-            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, one4));
-
-            left  = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
-            right = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
-
-            drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift);
-            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift);
-        }
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_s32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_s32__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    for (drflac_uint64 i = 0; i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample));
-        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample));
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    for (i = 0; i < frameCount4; ++i) {
-        drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0;
-        drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0;
-        drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0;
-        drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0;
-
-        drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1;
-        drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1;
-        drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1;
-        drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1;
-
-        pOutputSamples[i*8+0] = (drflac_int32)tempL0;
-        pOutputSamples[i*8+1] = (drflac_int32)tempR0;
-        pOutputSamples[i*8+2] = (drflac_int32)tempL1;
-        pOutputSamples[i*8+3] = (drflac_int32)tempR1;
-        pOutputSamples[i*8+4] = (drflac_int32)tempL2;
-        pOutputSamples[i*8+5] = (drflac_int32)tempR2;
-        pOutputSamples[i*8+6] = (drflac_int32)tempL3;
-        pOutputSamples[i*8+7] = (drflac_int32)tempR3;
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0);
-        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1);
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    for (i = 0; i < frameCount4; ++i) {
-        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
-        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
-
-        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
-        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0);
-        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1);
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    int32x4_t shift4_0 = vdupq_n_s32(shift0);
-    int32x4_t shift4_1 = vdupq_n_s32(shift1);
-
-    for (i = 0; i < frameCount4; ++i) {
-        int32x4_t left;
-        int32x4_t right;
-
-        left  = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift4_0));
-        right = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift4_1));
-
-        drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0);
-        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1);
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_s32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_s32__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-
-DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut)
-{
-    drflac_uint64 framesRead;
-    drflac_uint32 unusedBitsPerSample;
-
-    if (pFlac == NULL || framesToRead == 0) {
-        return 0;
-    }
-
-    if (pBufferOut == NULL) {
-        return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead);
-    }
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 32);
-    unusedBitsPerSample = 32 - pFlac->bitsPerSample;
-
-    framesRead = 0;
-    while (framesToRead > 0) {
-        /* If we've run out of samples in this frame, go to the next. */
-        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
-            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
-                break;  /* Couldn't read the next frame, so just break from the loop and return. */
-            }
-        } else {
-            unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
-            drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining;
-            drflac_uint64 frameCountThisIteration = framesToRead;
-
-            if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) {
-                frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining;
-            }
-
-            if (channelCount == 2) {
-                const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame;
-                const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame;
-
-                switch (pFlac->currentFLACFrame.header.channelAssignment)
-                {
-                    case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE:
-                    {
-                        drflac_read_pcm_frames_s32__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-
-                    case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
-                    {
-                        drflac_read_pcm_frames_s32__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-
-                    case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE:
-                    {
-                        drflac_read_pcm_frames_s32__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-
-                    case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT:
-                    default:
-                    {
-                        drflac_read_pcm_frames_s32__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-                }
-            } else {
-                /* Generic interleaving. */
-                drflac_uint64 i;
-                for (i = 0; i < frameCountThisIteration; ++i) {
-                    unsigned int j;
-                    for (j = 0; j < channelCount; ++j) {
-                        pBufferOut[(i*channelCount)+j] = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample));
-                    }
-                }
-            }
-
-            framesRead                += frameCountThisIteration;
-            pBufferOut                += frameCountThisIteration * channelCount;
-            framesToRead              -= frameCountThisIteration;
-            pFlac->currentPCMFrame    += frameCountThisIteration;
-            pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)frameCountThisIteration;
-        }
-    }
-
-    return framesRead;
-}
-
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    for (i = 0; i < frameCount; ++i) {
-        drflac_uint32 left  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-        drflac_uint32 side  = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-        drflac_uint32 right = left - side;
-
-        left  >>= 16;
-        right >>= 16;
-
-        pOutputSamples[i*2+0] = (drflac_int16)left;
-        pOutputSamples[i*2+1] = (drflac_int16)right;
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    for (i = 0; i < frameCount4; ++i) {
-        drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0;
-        drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0;
-        drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0;
-        drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0;
-
-        drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1;
-        drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1;
-        drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1;
-        drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1;
-
-        drflac_uint32 right0 = left0 - side0;
-        drflac_uint32 right1 = left1 - side1;
-        drflac_uint32 right2 = left2 - side2;
-        drflac_uint32 right3 = left3 - side3;
-
-        left0  >>= 16;
-        left1  >>= 16;
-        left2  >>= 16;
-        left3  >>= 16;
-
-        right0 >>= 16;
-        right1 >>= 16;
-        right2 >>= 16;
-        right3 >>= 16;
-
-        pOutputSamples[i*8+0] = (drflac_int16)left0;
-        pOutputSamples[i*8+1] = (drflac_int16)right0;
-        pOutputSamples[i*8+2] = (drflac_int16)left1;
-        pOutputSamples[i*8+3] = (drflac_int16)right1;
-        pOutputSamples[i*8+4] = (drflac_int16)left2;
-        pOutputSamples[i*8+5] = (drflac_int16)right2;
-        pOutputSamples[i*8+6] = (drflac_int16)left3;
-        pOutputSamples[i*8+7] = (drflac_int16)right3;
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
-        drflac_uint32 right = left - side;
-
-        left  >>= 16;
-        right >>= 16;
-
-        pOutputSamples[i*2+0] = (drflac_int16)left;
-        pOutputSamples[i*2+1] = (drflac_int16)right;
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    for (i = 0; i < frameCount4; ++i) {
-        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
-        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
-        __m128i right = _mm_sub_epi32(left, side);
-
-        left  = _mm_srai_epi32(left,  16);
-        right = _mm_srai_epi32(right, 16);
-
-        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
-        drflac_uint32 right = left - side;
-
-        left  >>= 16;
-        right >>= 16;
-
-        pOutputSamples[i*2+0] = (drflac_int16)left;
-        pOutputSamples[i*2+1] = (drflac_int16)right;
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-    int32x4_t shift0_4;
-    int32x4_t shift1_4;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    shift0_4 = vdupq_n_s32(shift0);
-    shift1_4 = vdupq_n_s32(shift1);
-
-    for (i = 0; i < frameCount4; ++i) {
-        uint32x4_t left;
-        uint32x4_t side;
-        uint32x4_t right;
-
-        left  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
-        side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
-        right = vsubq_u32(left, side);
-
-        left  = vshrq_n_u32(left,  16);
-        right = vshrq_n_u32(right, 16);
-
-        drflac__vst2q_u16((drflac_uint16*)pOutputSamples + i*8, vzip_u16(vmovn_u32(left), vmovn_u32(right)));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
-        drflac_uint32 right = left - side;
-
-        left  >>= 16;
-        right >>= 16;
-
-        pOutputSamples[i*2+0] = (drflac_int16)left;
-        pOutputSamples[i*2+1] = (drflac_int16)right;
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s16__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s16__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_s16__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_s16__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    for (i = 0; i < frameCount; ++i) {
-        drflac_uint32 side  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-        drflac_uint32 right = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-        drflac_uint32 left  = right + side;
-
-        left  >>= 16;
-        right >>= 16;
-
-        pOutputSamples[i*2+0] = (drflac_int16)left;
-        pOutputSamples[i*2+1] = (drflac_int16)right;
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    for (i = 0; i < frameCount4; ++i) {
-        drflac_uint32 side0  = pInputSamples0U32[i*4+0] << shift0;
-        drflac_uint32 side1  = pInputSamples0U32[i*4+1] << shift0;
-        drflac_uint32 side2  = pInputSamples0U32[i*4+2] << shift0;
-        drflac_uint32 side3  = pInputSamples0U32[i*4+3] << shift0;
-
-        drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1;
-        drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1;
-        drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1;
-        drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1;
-
-        drflac_uint32 left0 = right0 + side0;
-        drflac_uint32 left1 = right1 + side1;
-        drflac_uint32 left2 = right2 + side2;
-        drflac_uint32 left3 = right3 + side3;
-
-        left0  >>= 16;
-        left1  >>= 16;
-        left2  >>= 16;
-        left3  >>= 16;
-
-        right0 >>= 16;
-        right1 >>= 16;
-        right2 >>= 16;
-        right3 >>= 16;
-
-        pOutputSamples[i*8+0] = (drflac_int16)left0;
-        pOutputSamples[i*8+1] = (drflac_int16)right0;
-        pOutputSamples[i*8+2] = (drflac_int16)left1;
-        pOutputSamples[i*8+3] = (drflac_int16)right1;
-        pOutputSamples[i*8+4] = (drflac_int16)left2;
-        pOutputSamples[i*8+5] = (drflac_int16)right2;
-        pOutputSamples[i*8+6] = (drflac_int16)left3;
-        pOutputSamples[i*8+7] = (drflac_int16)right3;
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 right = pInputSamples1U32[i] << shift1;
-        drflac_uint32 left  = right + side;
-
-        left  >>= 16;
-        right >>= 16;
-
-        pOutputSamples[i*2+0] = (drflac_int16)left;
-        pOutputSamples[i*2+1] = (drflac_int16)right;
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    for (i = 0; i < frameCount4; ++i) {
-        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
-        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
-        __m128i left  = _mm_add_epi32(right, side);
-
-        left  = _mm_srai_epi32(left,  16);
-        right = _mm_srai_epi32(right, 16);
-
-        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 right = pInputSamples1U32[i] << shift1;
-        drflac_uint32 left  = right + side;
-
-        left  >>= 16;
-        right >>= 16;
-
-        pOutputSamples[i*2+0] = (drflac_int16)left;
-        pOutputSamples[i*2+1] = (drflac_int16)right;
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-    int32x4_t shift0_4;
-    int32x4_t shift1_4;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    shift0_4 = vdupq_n_s32(shift0);
-    shift1_4 = vdupq_n_s32(shift1);
-
-    for (i = 0; i < frameCount4; ++i) {
-        uint32x4_t side;
-        uint32x4_t right;
-        uint32x4_t left;
-
-        side  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
-        right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
-        left  = vaddq_u32(right, side);
-
-        left  = vshrq_n_u32(left,  16);
-        right = vshrq_n_u32(right, 16);
-
-        drflac__vst2q_u16((drflac_uint16*)pOutputSamples + i*8, vzip_u16(vmovn_u32(left), vmovn_u32(right)));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 right = pInputSamples1U32[i] << shift1;
-        drflac_uint32 left  = right + side;
-
-        left  >>= 16;
-        right >>= 16;
-
-        pOutputSamples[i*2+0] = (drflac_int16)left;
-        pOutputSamples[i*2+1] = (drflac_int16)right;
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s16__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s16__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_s16__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_s16__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    for (drflac_uint64 i = 0; i < frameCount; ++i) {
-        drflac_uint32 mid  = (drflac_uint32)pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-        drflac_uint32 side = (drflac_uint32)pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-        mid = (mid << 1) | (side & 0x01);
-
-        pOutputSamples[i*2+0] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) >> 16);
-        pOutputSamples[i*2+1] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) >> 16);
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift = unusedBitsPerSample;
-
-    if (shift > 0) {
-        shift -= 1;
-        for (i = 0; i < frameCount4; ++i) {
-            drflac_uint32 temp0L;
-            drflac_uint32 temp1L;
-            drflac_uint32 temp2L;
-            drflac_uint32 temp3L;
-            drflac_uint32 temp0R;
-            drflac_uint32 temp1R;
-            drflac_uint32 temp2R;
-            drflac_uint32 temp3R;
-
-            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-
-            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid0 = (mid0 << 1) | (side0 & 0x01);
-            mid1 = (mid1 << 1) | (side1 & 0x01);
-            mid2 = (mid2 << 1) | (side2 & 0x01);
-            mid3 = (mid3 << 1) | (side3 & 0x01);
-
-            temp0L = (mid0 + side0) << shift;
-            temp1L = (mid1 + side1) << shift;
-            temp2L = (mid2 + side2) << shift;
-            temp3L = (mid3 + side3) << shift;
-
-            temp0R = (mid0 - side0) << shift;
-            temp1R = (mid1 - side1) << shift;
-            temp2R = (mid2 - side2) << shift;
-            temp3R = (mid3 - side3) << shift;
-
-            temp0L >>= 16;
-            temp1L >>= 16;
-            temp2L >>= 16;
-            temp3L >>= 16;
-
-            temp0R >>= 16;
-            temp1R >>= 16;
-            temp2R >>= 16;
-            temp3R >>= 16;
-
-            pOutputSamples[i*8+0] = (drflac_int16)temp0L;
-            pOutputSamples[i*8+1] = (drflac_int16)temp0R;
-            pOutputSamples[i*8+2] = (drflac_int16)temp1L;
-            pOutputSamples[i*8+3] = (drflac_int16)temp1R;
-            pOutputSamples[i*8+4] = (drflac_int16)temp2L;
-            pOutputSamples[i*8+5] = (drflac_int16)temp2R;
-            pOutputSamples[i*8+6] = (drflac_int16)temp3L;
-            pOutputSamples[i*8+7] = (drflac_int16)temp3R;
-        }
-    } else {
-        for (i = 0; i < frameCount4; ++i) {
-            drflac_uint32 temp0L;
-            drflac_uint32 temp1L;
-            drflac_uint32 temp2L;
-            drflac_uint32 temp3L;
-            drflac_uint32 temp0R;
-            drflac_uint32 temp1R;
-            drflac_uint32 temp2R;
-            drflac_uint32 temp3R;
-
-            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-
-            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid0 = (mid0 << 1) | (side0 & 0x01);
-            mid1 = (mid1 << 1) | (side1 & 0x01);
-            mid2 = (mid2 << 1) | (side2 & 0x01);
-            mid3 = (mid3 << 1) | (side3 & 0x01);
-
-            temp0L = ((drflac_int32)(mid0 + side0) >> 1);
-            temp1L = ((drflac_int32)(mid1 + side1) >> 1);
-            temp2L = ((drflac_int32)(mid2 + side2) >> 1);
-            temp3L = ((drflac_int32)(mid3 + side3) >> 1);
-
-            temp0R = ((drflac_int32)(mid0 - side0) >> 1);
-            temp1R = ((drflac_int32)(mid1 - side1) >> 1);
-            temp2R = ((drflac_int32)(mid2 - side2) >> 1);
-            temp3R = ((drflac_int32)(mid3 - side3) >> 1);
-
-            temp0L >>= 16;
-            temp1L >>= 16;
-            temp2L >>= 16;
-            temp3L >>= 16;
-
-            temp0R >>= 16;
-            temp1R >>= 16;
-            temp2R >>= 16;
-            temp3R >>= 16;
-
-            pOutputSamples[i*8+0] = (drflac_int16)temp0L;
-            pOutputSamples[i*8+1] = (drflac_int16)temp0R;
-            pOutputSamples[i*8+2] = (drflac_int16)temp1L;
-            pOutputSamples[i*8+3] = (drflac_int16)temp1R;
-            pOutputSamples[i*8+4] = (drflac_int16)temp2L;
-            pOutputSamples[i*8+5] = (drflac_int16)temp2R;
-            pOutputSamples[i*8+6] = (drflac_int16)temp3L;
-            pOutputSamples[i*8+7] = (drflac_int16)temp3R;
-        }
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-        mid = (mid << 1) | (side & 0x01);
-
-        pOutputSamples[i*2+0] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) >> 16);
-        pOutputSamples[i*2+1] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) >> 16);
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift = unusedBitsPerSample;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    if (shift == 0) {
-        for (i = 0; i < frameCount4; ++i) {
-            __m128i mid;
-            __m128i side;
-            __m128i left;
-            __m128i right;
-
-            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-
-            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
-
-            left  = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
-            right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
-
-            left  = _mm_srai_epi32(left,  16);
-            right = _mm_srai_epi32(right, 16);
-
-            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = (drflac_int16)(((drflac_int32)(mid + side) >> 1) >> 16);
-            pOutputSamples[i*2+1] = (drflac_int16)(((drflac_int32)(mid - side) >> 1) >> 16);
-        }
-    } else {
-        shift -= 1;
-        for (i = 0; i < frameCount4; ++i) {
-            __m128i mid;
-            __m128i side;
-            __m128i left;
-            __m128i right;
-
-            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-
-            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
-
-            left  = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
-            right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
-
-            left  = _mm_srai_epi32(left,  16);
-            right = _mm_srai_epi32(right, 16);
-
-            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) << shift) >> 16);
-            pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) << shift) >> 16);
-        }
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift = unusedBitsPerSample;
-    int32x4_t wbpsShift0_4; /* wbps = Wasted Bits Per Sample */
-    int32x4_t wbpsShift1_4; /* wbps = Wasted Bits Per Sample */
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    wbpsShift0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-    wbpsShift1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-
-    if (shift == 0) {
-        for (i = 0; i < frameCount4; ++i) {
-            uint32x4_t mid;
-            uint32x4_t side;
-            int32x4_t left;
-            int32x4_t right;
-
-            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
-            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
-
-            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
-
-            left  = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
-            right = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
-
-            left  = vshrq_n_s32(left,  16);
-            right = vshrq_n_s32(right, 16);
-
-            drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = (drflac_int16)(((drflac_int32)(mid + side) >> 1) >> 16);
-            pOutputSamples[i*2+1] = (drflac_int16)(((drflac_int32)(mid - side) >> 1) >> 16);
-        }
-    } else {
-        int32x4_t shift4;
-
-        shift -= 1;
-        shift4 = vdupq_n_s32(shift);
-
-        for (i = 0; i < frameCount4; ++i) {
-            uint32x4_t mid;
-            uint32x4_t side;
-            int32x4_t left;
-            int32x4_t right;
-
-            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
-            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
-
-            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
-
-            left  = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
-            right = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
-
-            left  = vshrq_n_s32(left,  16);
-            right = vshrq_n_s32(right, 16);
-
-            drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) << shift) >> 16);
-            pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) << shift) >> 16);
-        }
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s16__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s16__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_s16__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_s16__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    for (drflac_uint64 i = 0; i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (drflac_int16)((drflac_int32)((drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample)) >> 16);
-        pOutputSamples[i*2+1] = (drflac_int16)((drflac_int32)((drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample)) >> 16);
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    for (i = 0; i < frameCount4; ++i) {
-        drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0;
-        drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0;
-        drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0;
-        drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0;
-
-        drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1;
-        drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1;
-        drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1;
-        drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1;
-
-        tempL0 >>= 16;
-        tempL1 >>= 16;
-        tempL2 >>= 16;
-        tempL3 >>= 16;
-
-        tempR0 >>= 16;
-        tempR1 >>= 16;
-        tempR2 >>= 16;
-        tempR3 >>= 16;
-
-        pOutputSamples[i*8+0] = (drflac_int16)tempL0;
-        pOutputSamples[i*8+1] = (drflac_int16)tempR0;
-        pOutputSamples[i*8+2] = (drflac_int16)tempL1;
-        pOutputSamples[i*8+3] = (drflac_int16)tempR1;
-        pOutputSamples[i*8+4] = (drflac_int16)tempL2;
-        pOutputSamples[i*8+5] = (drflac_int16)tempR2;
-        pOutputSamples[i*8+6] = (drflac_int16)tempL3;
-        pOutputSamples[i*8+7] = (drflac_int16)tempR3;
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16);
-        pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16);
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    for (i = 0; i < frameCount4; ++i) {
-        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
-        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
-
-        left  = _mm_srai_epi32(left,  16);
-        right = _mm_srai_epi32(right, 16);
-
-        /* At this point we have results. We can now pack and interleave these into a single __m128i object and then store the in the output buffer. */
-        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16);
-        pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16);
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    int32x4_t shift0_4 = vdupq_n_s32(shift0);
-    int32x4_t shift1_4 = vdupq_n_s32(shift1);
-
-    for (i = 0; i < frameCount4; ++i) {
-        int32x4_t left;
-        int32x4_t right;
-
-        left  = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4));
-        right = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4));
-
-        left  = vshrq_n_s32(left,  16);
-        right = vshrq_n_s32(right, 16);
-
-        drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16);
-        pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16);
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s16__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s16__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_s16__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_s16__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut)
-{
-    drflac_uint64 framesRead;
-    drflac_uint32 unusedBitsPerSample;
-
-    if (pFlac == NULL || framesToRead == 0) {
-        return 0;
-    }
-
-    if (pBufferOut == NULL) {
-        return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead);
-    }
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 32);
-    unusedBitsPerSample = 32 - pFlac->bitsPerSample;
-
-    framesRead = 0;
-    while (framesToRead > 0) {
-        /* If we've run out of samples in this frame, go to the next. */
-        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
-            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
-                break;  /* Couldn't read the next frame, so just break from the loop and return. */
-            }
-        } else {
-            unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
-            drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining;
-            drflac_uint64 frameCountThisIteration = framesToRead;
-
-            if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) {
-                frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining;
-            }
-
-            if (channelCount == 2) {
-                const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame;
-                const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame;
-
-                switch (pFlac->currentFLACFrame.header.channelAssignment)
-                {
-                    case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE:
-                    {
-                        drflac_read_pcm_frames_s16__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-
-                    case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
-                    {
-                        drflac_read_pcm_frames_s16__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-
-                    case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE:
-                    {
-                        drflac_read_pcm_frames_s16__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-
-                    case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT:
-                    default:
-                    {
-                        drflac_read_pcm_frames_s16__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-                }
-            } else {
-                /* Generic interleaving. */
-                drflac_uint64 i;
-                for (i = 0; i < frameCountThisIteration; ++i) {
-                    unsigned int j;
-                    for (j = 0; j < channelCount; ++j) {
-                        drflac_int32 sampleS32 = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample));
-                        pBufferOut[(i*channelCount)+j] = (drflac_int16)(sampleS32 >> 16);
-                    }
-                }
-            }
-
-            framesRead                += frameCountThisIteration;
-            pBufferOut                += frameCountThisIteration * channelCount;
-            framesToRead              -= frameCountThisIteration;
-            pFlac->currentPCMFrame    += frameCountThisIteration;
-            pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)frameCountThisIteration;
-        }
-    }
-
-    return framesRead;
-}
-
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
-{
-    drflac_uint64 i;
-    for (i = 0; i < frameCount; ++i) {
-        drflac_uint32 left  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-        drflac_uint32 side  = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-        drflac_uint32 right = left - side;
-
-        pOutputSamples[i*2+0] = (double)((drflac_int32)left  / 2147483648.0);
-        pOutputSamples[i*2+1] = (double)((drflac_int32)right / 2147483648.0);
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    double factor = 1 / 2147483648.0;
-
-    for (i = 0; i < frameCount4; ++i) {
-        drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0;
-        drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0;
-        drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0;
-        drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0;
-
-        drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1;
-        drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1;
-        drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1;
-        drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1;
-
-        drflac_uint32 right0 = left0 - side0;
-        drflac_uint32 right1 = left1 - side1;
-        drflac_uint32 right2 = left2 - side2;
-        drflac_uint32 right3 = left3 - side3;
-
-        pOutputSamples[i*8+0] = (drflac_int32)left0  * factor;
-        pOutputSamples[i*8+1] = (drflac_int32)right0 * factor;
-        pOutputSamples[i*8+2] = (drflac_int32)left1  * factor;
-        pOutputSamples[i*8+3] = (drflac_int32)right1 * factor;
-        pOutputSamples[i*8+4] = (drflac_int32)left2  * factor;
-        pOutputSamples[i*8+5] = (drflac_int32)right2 * factor;
-        pOutputSamples[i*8+6] = (drflac_int32)left3  * factor;
-        pOutputSamples[i*8+7] = (drflac_int32)right3 * factor;
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
-        drflac_uint32 right = left - side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left  * factor;
-        pOutputSamples[i*2+1] = (drflac_int32)right * factor;
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
-    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
-    __m128 factor;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    factor = _mm_set1_ps(1.0f / 8388608.0f);
-
-    for (i = 0; i < frameCount4; ++i) {
-        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
-        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
-        __m128i right = _mm_sub_epi32(left, side);
-        __m128 leftf  = _mm_mul_ps(_mm_cvtepi32_ps(left),  factor);
-        __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor);
-
-        _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
-        _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
-        drflac_uint32 right = left - side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
-        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
-    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
-    float32x4_t factor4;
-    int32x4_t shift0_4;
-    int32x4_t shift1_4;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    factor4  = vdupq_n_f32(1.0f / 8388608.0f);
-    shift0_4 = vdupq_n_s32(shift0);
-    shift1_4 = vdupq_n_s32(shift1);
-
-    for (i = 0; i < frameCount4; ++i) {
-        uint32x4_t left;
-        uint32x4_t side;
-        uint32x4_t right;
-        float32x4_t leftf;
-        float32x4_t rightf;
-
-        left   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
-        side   = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
-        right  = vsubq_u32(left, side);
-        leftf  = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(left)),  factor4);
-        rightf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(right)), factor4);
-
-        drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
-        drflac_uint32 right = left - side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
-        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_f32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_f32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_f32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_f32__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
-{
-    drflac_uint64 i;
-    for (i = 0; i < frameCount; ++i) {
-        drflac_uint32 side  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-        drflac_uint32 right = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-        drflac_uint32 left  = right + side;
-
-        pOutputSamples[i*2+0] = (double)((drflac_int32)left  / 2147483648.0);
-        pOutputSamples[i*2+1] = (double)((drflac_int32)right / 2147483648.0);
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-    double factor = 1 / 2147483648.0;
-
-    for (i = 0; i < frameCount4; ++i) {
-        drflac_uint32 side0  = pInputSamples0U32[i*4+0] << shift0;
-        drflac_uint32 side1  = pInputSamples0U32[i*4+1] << shift0;
-        drflac_uint32 side2  = pInputSamples0U32[i*4+2] << shift0;
-        drflac_uint32 side3  = pInputSamples0U32[i*4+3] << shift0;
-
-        drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1;
-        drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1;
-        drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1;
-        drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1;
-
-        drflac_uint32 left0 = right0 + side0;
-        drflac_uint32 left1 = right1 + side1;
-        drflac_uint32 left2 = right2 + side2;
-        drflac_uint32 left3 = right3 + side3;
-
-        pOutputSamples[i*8+0] = (drflac_int32)left0  * factor;
-        pOutputSamples[i*8+1] = (drflac_int32)right0 * factor;
-        pOutputSamples[i*8+2] = (drflac_int32)left1  * factor;
-        pOutputSamples[i*8+3] = (drflac_int32)right1 * factor;
-        pOutputSamples[i*8+4] = (drflac_int32)left2  * factor;
-        pOutputSamples[i*8+5] = (drflac_int32)right2 * factor;
-        pOutputSamples[i*8+6] = (drflac_int32)left3  * factor;
-        pOutputSamples[i*8+7] = (drflac_int32)right3 * factor;
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 right = pInputSamples1U32[i] << shift1;
-        drflac_uint32 left  = right + side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left  * factor;
-        pOutputSamples[i*2+1] = (drflac_int32)right * factor;
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
-    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
-    __m128 factor;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    factor = _mm_set1_ps(1.0f / 8388608.0f);
-
-    for (i = 0; i < frameCount4; ++i) {
-        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
-        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
-        __m128i left  = _mm_add_epi32(right, side);
-        __m128 leftf  = _mm_mul_ps(_mm_cvtepi32_ps(left),  factor);
-        __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor);
-
-        _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
-        _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 right = pInputSamples1U32[i] << shift1;
-        drflac_uint32 left  = right + side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
-        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
-    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
-    float32x4_t factor4;
-    int32x4_t shift0_4;
-    int32x4_t shift1_4;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    factor4  = vdupq_n_f32(1.0f / 8388608.0f);
-    shift0_4 = vdupq_n_s32(shift0);
-    shift1_4 = vdupq_n_s32(shift1);
-
-    for (i = 0; i < frameCount4; ++i) {
-        uint32x4_t side;
-        uint32x4_t right;
-        uint32x4_t left;
-        float32x4_t leftf;
-        float32x4_t rightf;
-
-        side   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
-        right  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
-        left   = vaddq_u32(right, side);
-        leftf  = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(left)),  factor4);
-        rightf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(right)), factor4);
-
-        drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 right = pInputSamples1U32[i] << shift1;
-        drflac_uint32 left  = right + side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
-        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_f32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_f32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_f32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_f32__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
-{
-    for (drflac_uint64 i = 0; i < frameCount; ++i) {
-        drflac_uint32 mid  = (drflac_uint32)pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-        drflac_uint32 side = (drflac_uint32)pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-        mid = (mid << 1) | (side & 0x01);
-
-        pOutputSamples[i*2+0] = (double)((((drflac_int32)(mid + side) >> 1) << (unusedBitsPerSample)) / 2147483648.0);
-        pOutputSamples[i*2+1] = (double)((((drflac_int32)(mid - side) >> 1) << (unusedBitsPerSample)) / 2147483648.0);
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift = unusedBitsPerSample;
-    double factor = 1 / 2147483648.0;
-
-    if (shift > 0) {
-        shift -= 1;
-        for (i = 0; i < frameCount4; ++i) {
-            drflac_uint32 temp0L;
-            drflac_uint32 temp1L;
-            drflac_uint32 temp2L;
-            drflac_uint32 temp3L;
-            drflac_uint32 temp0R;
-            drflac_uint32 temp1R;
-            drflac_uint32 temp2R;
-            drflac_uint32 temp3R;
-
-            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-
-            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid0 = (mid0 << 1) | (side0 & 0x01);
-            mid1 = (mid1 << 1) | (side1 & 0x01);
-            mid2 = (mid2 << 1) | (side2 & 0x01);
-            mid3 = (mid3 << 1) | (side3 & 0x01);
-
-            temp0L = (mid0 + side0) << shift;
-            temp1L = (mid1 + side1) << shift;
-            temp2L = (mid2 + side2) << shift;
-            temp3L = (mid3 + side3) << shift;
-
-            temp0R = (mid0 - side0) << shift;
-            temp1R = (mid1 - side1) << shift;
-            temp2R = (mid2 - side2) << shift;
-            temp3R = (mid3 - side3) << shift;
-
-            pOutputSamples[i*8+0] = (drflac_int32)temp0L * factor;
-            pOutputSamples[i*8+1] = (drflac_int32)temp0R * factor;
-            pOutputSamples[i*8+2] = (drflac_int32)temp1L * factor;
-            pOutputSamples[i*8+3] = (drflac_int32)temp1R * factor;
-            pOutputSamples[i*8+4] = (drflac_int32)temp2L * factor;
-            pOutputSamples[i*8+5] = (drflac_int32)temp2R * factor;
-            pOutputSamples[i*8+6] = (drflac_int32)temp3L * factor;
-            pOutputSamples[i*8+7] = (drflac_int32)temp3R * factor;
-        }
-    } else {
-        for (i = 0; i < frameCount4; ++i) {
-            drflac_uint32 temp0L;
-            drflac_uint32 temp1L;
-            drflac_uint32 temp2L;
-            drflac_uint32 temp3L;
-            drflac_uint32 temp0R;
-            drflac_uint32 temp1R;
-            drflac_uint32 temp2R;
-            drflac_uint32 temp3R;
-
-            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-
-            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid0 = (mid0 << 1) | (side0 & 0x01);
-            mid1 = (mid1 << 1) | (side1 & 0x01);
-            mid2 = (mid2 << 1) | (side2 & 0x01);
-            mid3 = (mid3 << 1) | (side3 & 0x01);
-
-            temp0L = (drflac_uint32)((drflac_int32)(mid0 + side0) >> 1);
-            temp1L = (drflac_uint32)((drflac_int32)(mid1 + side1) >> 1);
-            temp2L = (drflac_uint32)((drflac_int32)(mid2 + side2) >> 1);
-            temp3L = (drflac_uint32)((drflac_int32)(mid3 + side3) >> 1);
-
-            temp0R = (drflac_uint32)((drflac_int32)(mid0 - side0) >> 1);
-            temp1R = (drflac_uint32)((drflac_int32)(mid1 - side1) >> 1);
-            temp2R = (drflac_uint32)((drflac_int32)(mid2 - side2) >> 1);
-            temp3R = (drflac_uint32)((drflac_int32)(mid3 - side3) >> 1);
-
-            pOutputSamples[i*8+0] = (drflac_int32)temp0L * factor;
-            pOutputSamples[i*8+1] = (drflac_int32)temp0R * factor;
-            pOutputSamples[i*8+2] = (drflac_int32)temp1L * factor;
-            pOutputSamples[i*8+3] = (drflac_int32)temp1R * factor;
-            pOutputSamples[i*8+4] = (drflac_int32)temp2L * factor;
-            pOutputSamples[i*8+5] = (drflac_int32)temp2R * factor;
-            pOutputSamples[i*8+6] = (drflac_int32)temp3L * factor;
-            pOutputSamples[i*8+7] = (drflac_int32)temp3R * factor;
-        }
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-        mid = (mid << 1) | (side & 0x01);
-
-        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) * factor;
-        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) * factor;
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift = unusedBitsPerSample - 8;
-    double factor;
-    __m128 factor128;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    factor = 1.0f / 8388608.0f;
-    factor128 = _mm_set1_ps(factor);
-
-    if (shift == 0) {
-        for (i = 0; i < frameCount4; ++i) {
-            __m128i mid;
-            __m128i side;
-            __m128i tempL;
-            __m128i tempR;
-            __m128  leftf;
-            __m128  rightf;
-
-            mid    = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-            side   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-
-            mid    = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
-
-            tempL  = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
-            tempR  = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
-
-            leftf  = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128);
-            rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128);
-
-            _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
-            _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = ((drflac_int32)(mid + side) >> 1) * factor;
-            pOutputSamples[i*2+1] = ((drflac_int32)(mid - side) >> 1) * factor;
-        }
-    } else {
-        shift -= 1;
-        for (i = 0; i < frameCount4; ++i) {
-            __m128i mid;
-            __m128i side;
-            __m128i tempL;
-            __m128i tempR;
-            __m128 leftf;
-            __m128 rightf;
-
-            mid    = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-            side   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-
-            mid    = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
-
-            tempL  = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
-            tempR  = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
-
-            leftf  = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128);
-            rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128);
-
-            _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
-            _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift) * factor;
-            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift) * factor;
-        }
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift = unusedBitsPerSample - 8;
-    double factor;
-    float32x4_t factor4;
-    int32x4_t shift4;
-    int32x4_t wbps0_4;  /* Wasted Bits Per Sample */
-    int32x4_t wbps1_4;  /* Wasted Bits Per Sample */
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    factor  = 1.0f / 8388608.0f;
-    factor4 = vdupq_n_f32(factor);
-    wbps0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-    wbps1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-
-    if (shift == 0) {
-        for (i = 0; i < frameCount4; ++i) {
-            int32x4_t lefti;
-            int32x4_t righti;
-            float32x4_t leftf;
-            float32x4_t rightf;
-
-            uint32x4_t mid  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbps0_4);
-            uint32x4_t side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbps1_4);
-
-            mid    = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
-
-            lefti  = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
-            righti = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
-
-            leftf  = vmulq_f32(vcvtq_f32_s32(lefti),  factor4);
-            rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
-
-            drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = ((drflac_int32)(mid + side) >> 1) * factor;
-            pOutputSamples[i*2+1] = ((drflac_int32)(mid - side) >> 1) * factor;
-        }
-    } else {
-        shift -= 1;
-        shift4 = vdupq_n_s32(shift);
-        for (i = 0; i < frameCount4; ++i) {
-            uint32x4_t mid;
-            uint32x4_t side;
-            int32x4_t lefti;
-            int32x4_t righti;
-            float32x4_t leftf;
-            float32x4_t rightf;
-
-            mid    = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbps0_4);
-            side   = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbps1_4);
-
-            mid    = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
-
-            lefti  = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
-            righti = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
-
-            leftf  = vmulq_f32(vcvtq_f32_s32(lefti),  factor4);
-            rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
-
-            drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift) * factor;
-            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift) * factor;
-        }
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_f32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_f32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_f32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_f32__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
-{
-    for (drflac_uint64 i = 0; i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (double)((drflac_int32)((drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample)) / 2147483648.0);
-        pOutputSamples[i*2+1] = (double)((drflac_int32)((drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample)) / 2147483648.0);
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-    double factor = 1 / 2147483648.0;
-
-    for (i = 0; i < frameCount4; ++i) {
-        drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0;
-        drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0;
-        drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0;
-        drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0;
-
-        drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1;
-        drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1;
-        drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1;
-        drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1;
-
-        pOutputSamples[i*8+0] = (drflac_int32)tempL0 * factor;
-        pOutputSamples[i*8+1] = (drflac_int32)tempR0 * factor;
-        pOutputSamples[i*8+2] = (drflac_int32)tempL1 * factor;
-        pOutputSamples[i*8+3] = (drflac_int32)tempR1 * factor;
-        pOutputSamples[i*8+4] = (drflac_int32)tempL2 * factor;
-        pOutputSamples[i*8+5] = (drflac_int32)tempR2 * factor;
-        pOutputSamples[i*8+6] = (drflac_int32)tempL3 * factor;
-        pOutputSamples[i*8+7] = (drflac_int32)tempR3 * factor;
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor;
-        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor;
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
-    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
-
-    double factor = 1.0f / 8388608.0f;
-    __m128 factor128 = _mm_set1_ps(factor);
-
-    for (i = 0; i < frameCount4; ++i) {
-        __m128i lefti;
-        __m128i righti;
-        __m128 leftf;
-        __m128 rightf;
-
-        lefti  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
-        righti = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
-
-        leftf  = _mm_mul_ps(_mm_cvtepi32_ps(lefti),  factor128);
-        rightf = _mm_mul_ps(_mm_cvtepi32_ps(righti), factor128);
-
-        _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
-        _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor;
-        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor;
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
-    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
-
-    double factor = 1.0f / 8388608.0f;
-    float32x4_t factor4 = vdupq_n_f32(factor);
-    int32x4_t shift0_4  = vdupq_n_s32(shift0);
-    int32x4_t shift1_4  = vdupq_n_s32(shift1);
-
-    for (i = 0; i < frameCount4; ++i) {
-        int32x4_t lefti;
-        int32x4_t righti;
-        float32x4_t leftf;
-        float32x4_t rightf;
-
-        lefti  = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4));
-        righti = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4));
-
-        leftf  = vmulq_f32(vcvtq_f32_s32(lefti),  factor4);
-        rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
-
-        drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor;
-        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor;
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_f32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_f32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-DRFLAC_API drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, double* pBufferOut)
-{
-    drflac_uint64 framesRead;
-    drflac_uint32 unusedBitsPerSample;
-
-    if (pFlac == NULL || framesToRead == 0) {
-        return 0;
-    }
-
-    if (pBufferOut == NULL) {
-        return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead);
-    }
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 32);
-    unusedBitsPerSample = 32 - pFlac->bitsPerSample;
-
-    framesRead = 0;
-    while (framesToRead > 0) {
-        /* If we've run out of samples in this frame, go to the next. */
-        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
-            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
-                break;  /* Couldn't read the next frame, so just break from the loop and return. */
-            }
-        } else {
-            unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
-            drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining;
-            drflac_uint64 frameCountThisIteration = framesToRead;
-
-            if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) {
-                frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining;
-            }
-
-            if (channelCount == 2) {
-                const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame;
-                const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame;
-
-                switch (pFlac->currentFLACFrame.header.channelAssignment)
-                {
-                    case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE:
-                    {
-                        drflac_read_pcm_frames_f32__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-
-                    case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
-                    {
-                        drflac_read_pcm_frames_f32__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-
-                    case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE:
-                    {
-                        drflac_read_pcm_frames_f32__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-
-                    case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT:
-                    default:
-                    {
-                        drflac_read_pcm_frames_f32__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-                }
-            } else {
-                /* Generic interleaving. */
-                drflac_uint64 i;
-                for (i = 0; i < frameCountThisIteration; ++i) {
-                    unsigned int j;
-                    for (j = 0; j < channelCount; ++j) {
-                        drflac_int32 sampleS32 = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample));
-                        pBufferOut[(i*channelCount)+j] = (double)(sampleS32 / 2147483648.0);
-                    }
-                }
-            }
-
-            framesRead                += frameCountThisIteration;
-            pBufferOut                += frameCountThisIteration * channelCount;
-            framesToRead              -= frameCountThisIteration;
-            pFlac->currentPCMFrame    += frameCountThisIteration;
-            pFlac->currentFLACFrame.pcmFramesRemaining -= (unsigned int)frameCountThisIteration;
-        }
-    }
-
-    return framesRead;
-}
-
-
-DRFLAC_API drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex)
-{
-    if (pFlac == NULL) {
-        return DRFLAC_FALSE;
-    }
-
-    /* Don't do anything if we're already on the seek point. */
-    if (pFlac->currentPCMFrame == pcmFrameIndex) {
-        return DRFLAC_TRUE;
-    }
-
-    /*
-    If we don't know where the first frame begins then we can't seek. This will happen when the STREAMINFO block was not present
-    when the decoder was opened.
-    */
-    if (pFlac->firstFLACFramePosInBytes == 0) {
-        return DRFLAC_FALSE;
-    }
-
-    if (pcmFrameIndex == 0) {
-        pFlac->currentPCMFrame = 0;
-        return drflac__seek_to_first_frame(pFlac);
-    } else {
-        drflac_bool32 wasSuccessful = DRFLAC_FALSE;
-
-        /* Clamp the sample to the end. */
-        if (pcmFrameIndex > pFlac->totalPCMFrameCount) {
-            pcmFrameIndex = pFlac->totalPCMFrameCount;
-        }
-
-        /* If the target sample and the current sample are in the same frame we just move the position forward. */
-        if (pcmFrameIndex > pFlac->currentPCMFrame) {
-            /* Forward. */
-            drflac_uint32 offset = (drflac_uint32)(pcmFrameIndex - pFlac->currentPCMFrame);
-            if (pFlac->currentFLACFrame.pcmFramesRemaining >  offset) {
-                pFlac->currentFLACFrame.pcmFramesRemaining -= offset;
-                pFlac->currentPCMFrame = pcmFrameIndex;
-                return DRFLAC_TRUE;
-            }
-        } else {
-            /* Backward. */
-            drflac_uint32 offsetAbs = (drflac_uint32)(pFlac->currentPCMFrame - pcmFrameIndex);
-            drflac_uint32 currentFLACFramePCMFrameCount = pFlac->currentFLACFrame.header.blockSizeInPCMFrames;
-            drflac_uint32 currentFLACFramePCMFramesConsumed = currentFLACFramePCMFrameCount - pFlac->currentFLACFrame.pcmFramesRemaining;
-            if (currentFLACFramePCMFramesConsumed > offsetAbs) {
-                pFlac->currentFLACFrame.pcmFramesRemaining += offsetAbs;
-                pFlac->currentPCMFrame = pcmFrameIndex;
-                return DRFLAC_TRUE;
-            }
-        }
-
-        /*
-        Different techniques depending on encapsulation. Using the native FLAC seektable with Ogg encapsulation is a bit awkward so
-        we'll instead use Ogg's natural seeking facility.
-        */
-#ifndef DR_FLAC_NO_OGG
-        if (pFlac->container == drflac_container_ogg)
-        {
-            wasSuccessful = drflac_ogg__seek_to_pcm_frame(pFlac, pcmFrameIndex);
-        }
-        else
-#endif
-        {
-            /* First try seeking via the seek table. If this fails, fall back to a brute force seek which is much slower. */
-            if (/*!wasSuccessful && */!pFlac->_noSeekTableSeek) {
-                wasSuccessful = drflac__seek_to_pcm_frame__seek_table(pFlac, pcmFrameIndex);
-            }
-
-#if !defined(DR_FLAC_NO_CRC)
-            /* Fall back to binary search if seek table seeking fails. This requires the length of the stream to be known. */
-            if (!wasSuccessful && !pFlac->_noBinarySearchSeek && pFlac->totalPCMFrameCount > 0) {
-                wasSuccessful = drflac__seek_to_pcm_frame__binary_search(pFlac, pcmFrameIndex);
-            }
-#endif
-
-            /* Fall back to brute force if all else fails. */
-            if (!wasSuccessful && !pFlac->_noBruteForceSeek) {
-                wasSuccessful = drflac__seek_to_pcm_frame__brute_force(pFlac, pcmFrameIndex);
-            }
-        }
-
-        pFlac->currentPCMFrame = pcmFrameIndex;
-        return wasSuccessful;
-    }
-}
-
-
-
-/* High Level APIs */
-
-#if defined(SIZE_MAX)
-    #define DRFLAC_SIZE_MAX  SIZE_MAX
-#else
-    #if defined(DRFLAC_64BIT)
-        #define DRFLAC_SIZE_MAX  ((drflac_uint64)0xFFFFFFFFFFFFFFFF)
-    #else
-        #define DRFLAC_SIZE_MAX  0xFFFFFFFF
-    #endif
-#endif
-
-
-/* Using a macro as the definition of the drflac__full_decode_and_close_*() API family. Sue me. */
-#define DRFLAC_DEFINE_FULL_READ_AND_CLOSE(extension, type) \
-static type* drflac__full_read_and_close_ ## extension (drflac* pFlac, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut)\
-{                                                                                                                                                                   \
-    type* pSampleData = NULL;                                                                                                                                       \
-    drflac_uint64 totalPCMFrameCount;                                                                                                                               \
-                                                                                                                                                                    \
-    DRFLAC_ASSERT(pFlac != NULL);                                                                                                                                   \
-                                                                                                                                                                    \
-    totalPCMFrameCount = pFlac->totalPCMFrameCount;                                                                                                                 \
-                                                                                                                                                                    \
-    if (totalPCMFrameCount == 0) {                                                                                                                                  \
-        type buffer[4096];                                                                                                                                          \
-        drflac_uint64 pcmFramesRead;                                                                                                                                \
-        size_t sampleDataBufferSize = sizeof(buffer);                                                                                                               \
-                                                                                                                                                                    \
-        pSampleData = (type*)drflac__malloc_from_callbacks(sampleDataBufferSize, &pFlac->allocationCallbacks);                                                      \
-        if (pSampleData == NULL) {                                                                                                                                  \
-            goto on_error;                                                                                                                                          \
-        }                                                                                                                                                           \
-                                                                                                                                                                    \
-        while ((pcmFramesRead = (drflac_uint64)drflac_read_pcm_frames_##extension(pFlac, sizeof(buffer)/sizeof(buffer[0])/pFlac->channels, buffer)) > 0) {          \
-            if (((totalPCMFrameCount + pcmFramesRead) * pFlac->channels * sizeof(type)) > sampleDataBufferSize) {                                                   \
-                type* pNewSampleData;                                                                                                                               \
-                size_t newSampleDataBufferSize;                                                                                                                     \
-                                                                                                                                                                    \
-                newSampleDataBufferSize = sampleDataBufferSize * 2;                                                                                                 \
-                pNewSampleData = (type*)drflac__realloc_from_callbacks(pSampleData, newSampleDataBufferSize, sampleDataBufferSize, &pFlac->allocationCallbacks);    \
-                if (pNewSampleData == NULL) {                                                                                                                       \
-                    drflac__free_from_callbacks(pSampleData, &pFlac->allocationCallbacks);                                                                          \
-                    goto on_error;                                                                                                                                  \
-                }                                                                                                                                                   \
-                                                                                                                                                                    \
-                sampleDataBufferSize = newSampleDataBufferSize;                                                                                                     \
-                pSampleData = pNewSampleData;                                                                                                                       \
-            }                                                                                                                                                       \
-                                                                                                                                                                    \
-            DRFLAC_COPY_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), buffer, (size_t)(pcmFramesRead*pFlac->channels*sizeof(type)));                   \
-            totalPCMFrameCount += pcmFramesRead;                                                                                                                    \
-        }                                                                                                                                                           \
-                                                                                                                                                                    \
-        /* At this point everything should be decoded, but we just want to fill the unused part buffer with silence - need to                                       \
-           protect those ears from random noise! */                                                                                                                 \
-        DRFLAC_ZERO_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), (size_t)(sampleDataBufferSize - totalPCMFrameCount*pFlac->channels*sizeof(type)));   \
-    } else {                                                                                                                                                        \
-        drflac_uint64 dataSize = totalPCMFrameCount*pFlac->channels*sizeof(type);                                                                                   \
-        if (dataSize > DRFLAC_SIZE_MAX) {                                                                                                                           \
-            goto on_error;  /* The decoded data is too big. */                                                                                                      \
-        }                                                                                                                                                           \
-                                                                                                                                                                    \
-        pSampleData = (type*)drflac__malloc_from_callbacks((size_t)dataSize, &pFlac->allocationCallbacks);    /* <-- Safe cast as per the check above. */           \
-        if (pSampleData == NULL) {                                                                                                                                  \
-            goto on_error;                                                                                                                                          \
-        }                                                                                                                                                           \
-                                                                                                                                                                    \
-        totalPCMFrameCount = drflac_read_pcm_frames_##extension(pFlac, pFlac->totalPCMFrameCount, pSampleData);                                                     \
-    }                                                                                                                                                               \
-                                                                                                                                                                    \
-    if (sampleRateOut) *sampleRateOut = pFlac->sampleRate;                                                                                                          \
-    if (channelsOut) *channelsOut = pFlac->channels;                                                                                                                \
-    if (totalPCMFrameCountOut) *totalPCMFrameCountOut = totalPCMFrameCount;                                                                                         \
-                                                                                                                                                                    \
-    drflac_close(pFlac);                                                                                                                                            \
-    return pSampleData;                                                                                                                                             \
-                                                                                                                                                                    \
-on_error:                                                                                                                                                           \
-    drflac_close(pFlac);                                                                                                                                            \
-    return NULL;                                                                                                                                                    \
-}
-
-DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s32, drflac_int32)
-DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s16, drflac_int16)
-DRFLAC_DEFINE_FULL_READ_AND_CLOSE(f32, double)
-
-DRFLAC_API drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalPCMFrameCountOut) {
-        *totalPCMFrameCountOut = 0;
-    }
-
-    pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    return drflac__full_read_and_close_s32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
-}
-
-DRFLAC_API drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalPCMFrameCountOut) {
-        *totalPCMFrameCountOut = 0;
-    }
-
-    pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    return drflac__full_read_and_close_s16(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
-}
-
-DRFLAC_API double* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalPCMFrameCountOut) {
-        *totalPCMFrameCountOut = 0;
-    }
-
-    pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    return drflac__full_read_and_close_f32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
-}
-
-#ifndef DR_FLAC_NO_STDIO
-DRFLAC_API drflac_int32* drflac_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-
-    if (sampleRate) {
-        *sampleRate = 0;
-    }
-    if (channels) {
-        *channels = 0;
-    }
-    if (totalPCMFrameCount) {
-        *totalPCMFrameCount = 0;
-    }
-
-    pFlac = drflac_open_file(filename, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount);
-}
-
-DRFLAC_API drflac_int16* drflac_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-
-    if (sampleRate) {
-        *sampleRate = 0;
-    }
-    if (channels) {
-        *channels = 0;
-    }
-    if (totalPCMFrameCount) {
-        *totalPCMFrameCount = 0;
-    }
-
-    pFlac = drflac_open_file(filename, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount);
-}
-
-DRFLAC_API double* drflac_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-
-    if (sampleRate) {
-        *sampleRate = 0;
-    }
-    if (channels) {
-        *channels = 0;
-    }
-    if (totalPCMFrameCount) {
-        *totalPCMFrameCount = 0;
-    }
-
-    pFlac = drflac_open_file(filename, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount);
-}
-#endif
-
-DRFLAC_API drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-
-    if (sampleRate) {
-        *sampleRate = 0;
-    }
-    if (channels) {
-        *channels = 0;
-    }
-    if (totalPCMFrameCount) {
-        *totalPCMFrameCount = 0;
-    }
-
-    pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount);
-}
-
-DRFLAC_API drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-
-    if (sampleRate) {
-        *sampleRate = 0;
-    }
-    if (channels) {
-        *channels = 0;
-    }
-    if (totalPCMFrameCount) {
-        *totalPCMFrameCount = 0;
-    }
-
-    pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount);
-}
-
-DRFLAC_API double* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-
-    if (sampleRate) {
-        *sampleRate = 0;
-    }
-    if (channels) {
-        *channels = 0;
-    }
-    if (totalPCMFrameCount) {
-        *totalPCMFrameCount = 0;
-    }
-
-    pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount);
-}
-
-
-DRFLAC_API void drflac_free(void* p, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pAllocationCallbacks != NULL) {
-        drflac__free_from_callbacks(p, pAllocationCallbacks);
-    } else {
-        drflac__free_default(p, NULL);
-    }
-}
-
-
-
-
-DRFLAC_API void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const void* pComments)
-{
-    if (pIter == NULL) {
-        return;
-    }
-
-    pIter->countRemaining = commentCount;
-    pIter->pRunningData   = (const char*)pComments;
-}
-
-DRFLAC_API const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut)
-{
-    drflac_int32 length;
-    const char* pComment;
-
-    /* Safety. */
-    if (pCommentLengthOut) {
-        *pCommentLengthOut = 0;
-    }
-
-    if (pIter == NULL || pIter->countRemaining == 0 || pIter->pRunningData == NULL) {
-        return NULL;
-    }
-
-    length = drflac__le2host_32(*(const drflac_uint32*)pIter->pRunningData);
-    pIter->pRunningData += 4;
-
-    pComment = pIter->pRunningData;
-    pIter->pRunningData += length;
-    pIter->countRemaining -= 1;
-
-    if (pCommentLengthOut) {
-        *pCommentLengthOut = length;
-    }
-
-    return pComment;
-}
-
-
-
-
-DRFLAC_API void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_uint32 trackCount, const void* pTrackData)
-{
-    if (pIter == NULL) {
-        return;
-    }
-
-    pIter->countRemaining = trackCount;
-    pIter->pRunningData   = (const char*)pTrackData;
-}
-
-DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack)
-{
-    drflac_cuesheet_track cuesheetTrack;
-    const char* pRunningData;
-    drflac_uint64 offsetHi;
-    drflac_uint64 offsetLo;
-
-    if (pIter == NULL || pIter->countRemaining == 0 || pIter->pRunningData == NULL) {
-        return DRFLAC_FALSE;
-    }
-
-    pRunningData = pIter->pRunningData;
-
-    offsetHi                   = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-    offsetLo                   = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-    cuesheetTrack.offset       = offsetLo | (offsetHi << 32);
-    cuesheetTrack.trackNumber  = pRunningData[0];                                         pRunningData += 1;
-    DRFLAC_COPY_MEMORY(cuesheetTrack.ISRC, pRunningData, sizeof(cuesheetTrack.ISRC));     pRunningData += 12;
-    cuesheetTrack.isAudio      = (pRunningData[0] & 0x80) != 0;
-    cuesheetTrack.preEmphasis  = (pRunningData[0] & 0x40) != 0;                           pRunningData += 14;
-    cuesheetTrack.indexCount   = pRunningData[0];                                         pRunningData += 1;
-    cuesheetTrack.pIndexPoints = (const drflac_cuesheet_track_index*)pRunningData;        pRunningData += cuesheetTrack.indexCount * sizeof(drflac_cuesheet_track_index);
-
-    pIter->pRunningData = pRunningData;
-    pIter->countRemaining -= 1;
-
-    if (pCuesheetTrack) {
-        *pCuesheetTrack = cuesheetTrack;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-#if defined(__GNUC__)
-    #pragma GCC diagnostic pop
-#endif
-#endif  /* DR_FLAC_IMPLEMENTATION */
-
-
-/*
-REVISION HISTORY
-================
-v0.12.13 - 2020-05-16
-  - Add compile-time and run-time version querying.
-    - DRFLAC_VERSION_MINOR
-    - DRFLAC_VERSION_MAJOR
-    - DRFLAC_VERSION_REVISION
-    - DRFLAC_VERSION_STRING
-    - drflac_version()
-    - drflac_version_string()
-
-v0.12.12 - 2020-04-30
-  - Fix compilation errors with VC6.
-
-v0.12.11 - 2020-04-19
-  - Fix some pedantic warnings.
-  - Fix some undefined behaviour warnings.
-
-v0.12.10 - 2020-04-10
-  - Fix some bugs when trying to seek with an invalid seek table.
-
-v0.12.9 - 2020-04-05
-  - Fix warnings.
-
-v0.12.8 - 2020-04-04
-  - Add drflac_open_file_w() and drflac_open_file_with_metadata_w().
-  - Fix some static analysis warnings.
-  - Minor documentation updates.
-
-v0.12.7 - 2020-03-14
-  - Fix compilation errors with VC6.
-
-v0.12.6 - 2020-03-07
-  - Fix compilation error with Visual Studio .NET 2003.
-
-v0.12.5 - 2020-01-30
-  - Silence some static analysis warnings.
-
-v0.12.4 - 2020-01-29
-  - Silence some static analysis warnings.
-
-v0.12.3 - 2019-12-02
-  - Fix some warnings when compiling with GCC and the -Og flag.
-  - Fix a crash in out-of-memory situations.
-  - Fix potential integer overflow bug.
-  - Fix some static analysis warnings.
-  - Fix a possible crash when using custom memory allocators without a custom realloc() implementation.
-  - Fix a bug with binary search seeking where the bits per sample is not a multiple of 8.
-
-v0.12.2 - 2019-10-07
-  - Internal code clean up.
-
-v0.12.1 - 2019-09-29
-  - Fix some Clang Static Analyzer warnings.
-  - Fix an unused variable warning.
-
-v0.12.0 - 2019-09-23
-  - API CHANGE: Add support for user defined memory allocation routines. This system allows the program to specify their own memory allocation
-    routines with a user data pointer for client-specific contextual data. This adds an extra parameter to the end of the following APIs:
-    - drflac_open()
-    - drflac_open_relaxed()
-    - drflac_open_with_metadata()
-    - drflac_open_with_metadata_relaxed()
-    - drflac_open_file()
-    - drflac_open_file_with_metadata()
-    - drflac_open_memory()
-    - drflac_open_memory_with_metadata()
-    - drflac_open_and_read_pcm_frames_s32()
-    - drflac_open_and_read_pcm_frames_s16()
-    - drflac_open_and_read_pcm_frames_f32()
-    - drflac_open_file_and_read_pcm_frames_s32()
-    - drflac_open_file_and_read_pcm_frames_s16()
-    - drflac_open_file_and_read_pcm_frames_f32()
-    - drflac_open_memory_and_read_pcm_frames_s32()
-    - drflac_open_memory_and_read_pcm_frames_s16()
-    - drflac_open_memory_and_read_pcm_frames_f32()
-    Set this extra parameter to NULL to use defaults which is the same as the previous behaviour. Setting this NULL will use
-    DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE.
-  - Remove deprecated APIs:
-    - drflac_read_s32()
-    - drflac_read_s16()
-    - drflac_read_f32()
-    - drflac_seek_to_sample()
-    - drflac_open_and_decode_s32()
-    - drflac_open_and_decode_s16()
-    - drflac_open_and_decode_f32()
-    - drflac_open_and_decode_file_s32()
-    - drflac_open_and_decode_file_s16()
-    - drflac_open_and_decode_file_f32()
-    - drflac_open_and_decode_memory_s32()
-    - drflac_open_and_decode_memory_s16()
-    - drflac_open_and_decode_memory_f32()
-  - Remove drflac.totalSampleCount which is now replaced with drflac.totalPCMFrameCount. You can emulate drflac.totalSampleCount
-    by doing pFlac->totalPCMFrameCount*pFlac->channels.
-  - Rename drflac.currentFrame to drflac.currentFLACFrame to remove ambiguity with PCM frames.
-  - Fix errors when seeking to the end of a stream.
-  - Optimizations to seeking.
-  - SSE improvements and optimizations.
-  - ARM NEON optimizations.
-  - Optimizations to drflac_read_pcm_frames_s16().
-  - Optimizations to drflac_read_pcm_frames_s32().
-
-v0.11.10 - 2019-06-26
-  - Fix a compiler error.
-
-v0.11.9 - 2019-06-16
-  - Silence some ThreadSanitizer warnings.
-
-v0.11.8 - 2019-05-21
-  - Fix warnings.
-
-v0.11.7 - 2019-05-06
-  - C89 fixes.
-
-v0.11.6 - 2019-05-05
-  - Add support for C89.
-  - Fix a compiler warning when CRC is disabled.
-  - Change license to choice of public domain or MIT-0.
-
-v0.11.5 - 2019-04-19
-  - Fix a compiler error with GCC.
-
-v0.11.4 - 2019-04-17
-  - Fix some warnings with GCC when compiling with -std=c99.
-
-v0.11.3 - 2019-04-07
-  - Silence warnings with GCC.
-
-v0.11.2 - 2019-03-10
-  - Fix a warning.
-
-v0.11.1 - 2019-02-17
-  - Fix a potential bug with seeking.
-
-v0.11.0 - 2018-12-16
-  - API CHANGE: Deprecated drflac_read_s32(), drflac_read_s16() and drflac_read_f32() and replaced them with
-    drflac_read_pcm_frames_s32(), drflac_read_pcm_frames_s16() and drflac_read_pcm_frames_f32(). The new APIs take
-    and return PCM frame counts instead of sample counts. To upgrade you will need to change the input count by
-    dividing it by the channel count, and then do the same with the return value.
-  - API_CHANGE: Deprecated drflac_seek_to_sample() and replaced with drflac_seek_to_pcm_frame(). Same rules as
-    the changes to drflac_read_*() apply.
-  - API CHANGE: Deprecated drflac_open_and_decode_*() and replaced with drflac_open_*_and_read_*(). Same rules as
-    the changes to drflac_read_*() apply.
-  - Optimizations.
-
-v0.10.0 - 2018-09-11
-  - Remove the DR_FLAC_NO_WIN32_IO option and the Win32 file IO functionality. If you need to use Win32 file IO you
-    need to do it yourself via the callback API.
-  - Fix the clang build.
-  - Fix undefined behavior.
-  - Fix errors with CUESHEET metdata blocks.
-  - Add an API for iterating over each cuesheet track in the CUESHEET metadata block. This works the same way as the
-    Vorbis comment API.
-  - Other miscellaneous bug fixes, mostly relating to invalid FLAC streams.
-  - Minor optimizations.
-
-v0.9.11 - 2018-08-29
-  - Fix a bug with sample reconstruction.
-
-v0.9.10 - 2018-08-07
-  - Improve 64-bit detection.
-
-v0.9.9 - 2018-08-05
-  - Fix C++ build on older versions of GCC.
-
-v0.9.8 - 2018-07-24
-  - Fix compilation errors.
-
-v0.9.7 - 2018-07-05
-  - Fix a warning.
-
-v0.9.6 - 2018-06-29
-  - Fix some typos.
-
-v0.9.5 - 2018-06-23
-  - Fix some warnings.
-
-v0.9.4 - 2018-06-14
-  - Optimizations to seeking.
-  - Clean up.
-
-v0.9.3 - 2018-05-22
-  - Bug fix.
-
-v0.9.2 - 2018-05-12
-  - Fix a compilation error due to a missing break statement.
-
-v0.9.1 - 2018-04-29
-  - Fix compilation error with Clang.
-
-v0.9 - 2018-04-24
-  - Fix Clang build.
-  - Start using major.minor.revision versioning.
-
-v0.8g - 2018-04-19
-  - Fix build on non-x86/x64 architectures.
-
-v0.8f - 2018-02-02
-  - Stop pretending to support changing rate/channels mid stream.
-
-v0.8e - 2018-02-01
-  - Fix a crash when the block size of a frame is larger than the maximum block size defined by the FLAC stream.
-  - Fix a crash the the Rice partition order is invalid.
-
-v0.8d - 2017-09-22
-  - Add support for decoding streams with ID3 tags. ID3 tags are just skipped.
-
-v0.8c - 2017-09-07
-  - Fix warning on non-x86/x64 architectures.
-
-v0.8b - 2017-08-19
-  - Fix build on non-x86/x64 architectures.
-
-v0.8a - 2017-08-13
-  - A small optimization for the Clang build.
-
-v0.8 - 2017-08-12
-  - API CHANGE: Rename dr_* types to drflac_*.
-  - Optimizations. This brings dr_flac back to about the same class of efficiency as the reference implementation.
-  - Add support for custom implementations of malloc(), realloc(), etc.
-  - Add CRC checking to Ogg encapsulated streams.
-  - Fix VC++ 6 build. This is only for the C++ compiler. The C compiler is not currently supported.
-  - Bug fixes.
-
-v0.7 - 2017-07-23
-  - Add support for opening a stream without a header block. To do this, use drflac_open_relaxed() / drflac_open_with_metadata_relaxed().
-
-v0.6 - 2017-07-22
-  - Add support for recovering from invalid frames. With this change, dr_flac will simply skip over invalid frames as if they
-    never existed. Frames are checked against their sync code, the CRC-8 of the frame header and the CRC-16 of the whole frame.
-
-v0.5 - 2017-07-16
-  - Fix typos.
-  - Change drflac_bool* types to unsigned.
-  - Add CRC checking. This makes dr_flac slower, but can be disabled with #define DR_FLAC_NO_CRC.
-
-v0.4f - 2017-03-10
-  - Fix a couple of bugs with the bitstreaming code.
-
-v0.4e - 2017-02-17
-  - Fix some warnings.
-
-v0.4d - 2016-12-26
-  - Add support for 32-bit floating-point PCM decoding.
-  - Use drflac_int* and drflac_uint* sized types to improve compiler support.
-  - Minor improvements to documentation.
-
-v0.4c - 2016-12-26
-  - Add support for signed 16-bit integer PCM decoding.
-
-v0.4b - 2016-10-23
-  - A minor change to drflac_bool8 and drflac_bool32 types.
-
-v0.4a - 2016-10-11
-  - Rename drBool32 to drflac_bool32 for styling consistency.
-
-v0.4 - 2016-09-29
-  - API/ABI CHANGE: Use fixed size 32-bit booleans instead of the built-in bool type.
-  - API CHANGE: Rename drflac_open_and_decode*() to drflac_open_and_decode*_s32().
-  - API CHANGE: Swap the order of "channels" and "sampleRate" parameters in drflac_open_and_decode*(). Rationale for this is to
-    keep it consistent with drflac_audio.
-
-v0.3f - 2016-09-21
-  - Fix a warning with GCC.
-
-v0.3e - 2016-09-18
-  - Fixed a bug where GCC 4.3+ was not getting properly identified.
-  - Fixed a few typos.
-  - Changed date formats to ISO 8601 (YYYY-MM-DD).
-
-v0.3d - 2016-06-11
-  - Minor clean up.
-
-v0.3c - 2016-05-28
-  - Fixed compilation error.
-
-v0.3b - 2016-05-16
-  - Fixed Linux/GCC build.
-  - Updated documentation.
-
-v0.3a - 2016-05-15
-  - Minor fixes to documentation.
-
-v0.3 - 2016-05-11
-  - Optimizations. Now at about parity with the reference implementation on 32-bit builds.
-  - Lots of clean up.
-
-v0.2b - 2016-05-10
-  - Bug fixes.
-
-v0.2a - 2016-05-10
-  - Made drflac_open_and_decode() more robust.
-  - Removed an unused debugging variable
-
-v0.2 - 2016-05-09
-  - Added support for Ogg encapsulation.
-  - API CHANGE. Have the onSeek callback take a third argument which specifies whether or not the seek
-    should be relative to the start or the current position. Also changes the seeking rules such that
-    seeking offsets will never be negative.
-  - Have drflac_open_and_decode() fail gracefully if the stream has an unknown total sample count.
-
-v0.1b - 2016-05-07
-  - Properly close the file handle in drflac_open_file() and family when the decoder fails to initialize.
-  - Removed a stale comment.
-
-v0.1a - 2016-05-05
-  - Minor formatting changes.
-  - Fixed a warning on the GCC build.
-
-v0.1 - 2016-05-03
-  - Initial versioned release.
-*/
-
-/*
-This software is available as a choice of the following licenses. Choose
-whichever you prefer.
-
-===============================================================================
-ALTERNATIVE 1 - Public Domain (www.unlicense.org)
-===============================================================================
-This is free and unencumbered software released into the public domain.
-
-Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
-software, either in source code form or as a compiled binary, for any purpose,
-commercial or non-commercial, and by any means.
-
-In jurisdictions that recognize copyright laws, the author or authors of this
-software dedicate any and all copyright interest in the software to the public
-domain. We make this dedication for the benefit of the public at large and to
-the detriment of our heirs and successors. We intend this dedication to be an
-overt act of relinquishment in perpetuity of all present and future rights to
-this software under copyright law.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-For more information, please refer to <http://unlicense.org/>
-
-===============================================================================
-ALTERNATIVE 2 - MIT No Attribution
-===============================================================================
-Copyright 2020 David Reid
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-*/
+/*
+FLAC audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
+dr_flac - v0.12.13 - 2020-05-16
+
+David Reid - mackron@gmail.com
+
+GitHub: https://github.com/mackron/dr_libs
+*/
+
+/*
+RELEASE NOTES - v0.12.0
+=======================
+Version 0.12.0 has breaking API changes including changes to the existing API and the removal of deprecated APIs.
+
+
+Improved Client-Defined Memory Allocation
+-----------------------------------------
+The main change with this release is the addition of a more flexible way of implementing custom memory allocation routines. The
+existing system of DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE are still in place and will be used by default when no custom
+allocation callbacks are specified.
+
+To use the new system, you pass in a pointer to a drflac_allocation_callbacks object to drflac_open() and family, like this:
+
+    void* my_malloc(size_t sz, void* pUserData)
+    {
+        return malloc(sz);
+    }
+    void* my_realloc(void* p, size_t sz, void* pUserData)
+    {
+        return realloc(p, sz);
+    }
+    void my_free(void* p, void* pUserData)
+    {
+        free(p);
+    }
+
+    ...
+
+    drflac_allocation_callbacks allocationCallbacks;
+    allocationCallbacks.pUserData = &myData;
+    allocationCallbacks.onMalloc  = my_malloc;
+    allocationCallbacks.onRealloc = my_realloc;
+    allocationCallbacks.onFree    = my_free;
+    drflac* pFlac = drflac_open_file("my_file.flac", &allocationCallbacks);
+
+The advantage of this new system is that it allows you to specify user data which will be passed in to the allocation routines.
+
+Passing in null for the allocation callbacks object will cause dr_flac to use defaults which is the same as DRFLAC_MALLOC,
+DRFLAC_REALLOC and DRFLAC_FREE and the equivalent of how it worked in previous versions.
+
+Every API that opens a drflac object now takes this extra parameter. These include the following:
+
+    drflac_open()
+    drflac_open_relaxed()
+    drflac_open_with_metadata()
+    drflac_open_with_metadata_relaxed()
+    drflac_open_file()
+    drflac_open_file_with_metadata()
+    drflac_open_memory()
+    drflac_open_memory_with_metadata()
+    drflac_open_and_read_pcm_frames_s32()
+    drflac_open_and_read_pcm_frames_s16()
+    drflac_open_and_read_pcm_frames_f32()
+    drflac_open_file_and_read_pcm_frames_s32()
+    drflac_open_file_and_read_pcm_frames_s16()
+    drflac_open_file_and_read_pcm_frames_f32()
+    drflac_open_memory_and_read_pcm_frames_s32()
+    drflac_open_memory_and_read_pcm_frames_s16()
+    drflac_open_memory_and_read_pcm_frames_f32()
+
+
+
+Optimizations
+-------------
+Seeking performance has been greatly improved. A new binary search based seeking algorithm has been introduced which significantly
+improves performance over the brute force method which was used when no seek table was present. Seek table based seeking also takes
+advantage of the new binary search seeking system to further improve performance there as well. Note that this depends on CRC which
+means it will be disabled when DR_FLAC_NO_CRC is used.
+
+The SSE4.1 pipeline has been cleaned up and optimized. You should see some improvements with decoding speed of 24-bit files in
+particular. 16-bit streams should also see some improvement.
+
+drflac_read_pcm_frames_s16() has been optimized. Previously this sat on top of drflac_read_pcm_frames_s32() and performed it's s32
+to s16 conversion in a second pass. This is now all done in a single pass. This includes SSE2 and ARM NEON optimized paths.
+
+A minor optimization has been implemented for drflac_read_pcm_frames_s32(). This will now use an SSE2 optimized pipeline for stereo
+channel reconstruction which is the last part of the decoding process.
+
+The ARM build has seen a few improvements. The CLZ (count leading zeroes) and REV (byte swap) instructions are now used when
+compiling with GCC and Clang which is achieved using inline assembly. The CLZ instruction requires ARM architecture version 5 at
+compile time and the REV instruction requires ARM architecture version 6.
+
+An ARM NEON optimized pipeline has been implemented. To enable this you'll need to add -mfpu=neon to the command line when compiling.
+
+
+Removed APIs
+------------
+The following APIs were deprecated in version 0.11.0 and have been completely removed in version 0.12.0:
+
+    drflac_read_s32()                   -> drflac_read_pcm_frames_s32()
+    drflac_read_s16()                   -> drflac_read_pcm_frames_s16()
+    drflac_read_f32()                   -> drflac_read_pcm_frames_f32()
+    drflac_seek_to_sample()             -> drflac_seek_to_pcm_frame()
+    drflac_open_and_decode_s32()        -> drflac_open_and_read_pcm_frames_s32()
+    drflac_open_and_decode_s16()        -> drflac_open_and_read_pcm_frames_s16()
+    drflac_open_and_decode_f32()        -> drflac_open_and_read_pcm_frames_f32()
+    drflac_open_and_decode_file_s32()   -> drflac_open_file_and_read_pcm_frames_s32()
+    drflac_open_and_decode_file_s16()   -> drflac_open_file_and_read_pcm_frames_s16()
+    drflac_open_and_decode_file_f32()   -> drflac_open_file_and_read_pcm_frames_f32()
+    drflac_open_and_decode_memory_s32() -> drflac_open_memory_and_read_pcm_frames_s32()
+    drflac_open_and_decode_memory_s16() -> drflac_open_memory_and_read_pcm_frames_s16()
+    drflac_open_and_decode_memory_f32() -> drflac_open_memroy_and_read_pcm_frames_f32()
+
+Prior versions of dr_flac operated on a per-sample basis whereas now it operates on PCM frames. The removed APIs all relate
+to the old per-sample APIs. You now need to use the "pcm_frame" versions.
+*/
+
+
+/*
+Introduction
+============
+dr_flac is a single file library. To use it, do something like the following in one .c file.
+
+    ```c
+    #define DR_FLAC_IMPLEMENTATION
+    #include "dr_flac.h"
+    ```
+
+You can then #include this file in other parts of the program as you would with any other header file. To decode audio data, do something like the following:
+
+    ```c
+    drflac* pFlac = drflac_open_file("MySong.flac", NULL);
+    if (pFlac == NULL) {
+        // Failed to open FLAC file
+    }
+
+    drflac_int32* pSamples = malloc(pFlac->totalPCMFrameCount * pFlac->channels * sizeof(drflac_int32));
+    drflac_uint64 numberOfInterleavedSamplesActuallyRead = drflac_read_pcm_frames_s32(pFlac, pFlac->totalPCMFrameCount, pSamples);
+    ```
+
+The drflac object represents the decoder. It is a transparent type so all the information you need, such as the number of channels and the bits per sample,
+should be directly accessible - just make sure you don't change their values. Samples are always output as interleaved signed 32-bit PCM. In the example above
+a native FLAC stream was opened, however dr_flac has seamless support for Ogg encapsulated FLAC streams as well.
+
+You do not need to decode the entire stream in one go - you just specify how many samples you'd like at any given time and the decoder will give you as many
+samples as it can, up to the amount requested. Later on when you need the next batch of samples, just call it again. Example:
+
+    ```c
+    while (drflac_read_pcm_frames_s32(pFlac, chunkSizeInPCMFrames, pChunkSamples) > 0) {
+        do_something();
+    }
+    ```
+
+You can seek to a specific PCM frame with `drflac_seek_to_pcm_frame()`.
+
+If you just want to quickly decode an entire FLAC file in one go you can do something like this:
+
+    ```c
+    unsigned int channels;
+    unsigned int sampleRate;
+    drflac_uint64 totalPCMFrameCount;
+    drflac_int32* pSampleData = drflac_open_file_and_read_pcm_frames_s32("MySong.flac", &channels, &sampleRate, &totalPCMFrameCount, NULL);
+    if (pSampleData == NULL) {
+        // Failed to open and decode FLAC file.
+    }
+
+    ...
+
+    drflac_free(pSampleData);
+    ```
+
+You can read samples as signed 16-bit integer and 32-bit floating-point PCM with the *_s16() and *_f32() family of APIs respectively, but note that these
+should be considered lossy.
+
+
+If you need access to metadata (album art, etc.), use `drflac_open_with_metadata()`, `drflac_open_file_with_metdata()` or `drflac_open_memory_with_metadata()`.
+The rationale for keeping these APIs separate is that they're slightly slower than the normal versions and also just a little bit harder to use. dr_flac
+reports metadata to the application through the use of a callback, and every metadata block is reported before `drflac_open_with_metdata()` returns.
+
+The main opening APIs (`drflac_open()`, etc.) will fail if the header is not present. The presents a problem in certain scenarios such as broadcast style
+streams or internet radio where the header may not be present because the user has started playback mid-stream. To handle this, use the relaxed APIs:
+    
+    `drflac_open_relaxed()`
+    `drflac_open_with_metadata_relaxed()`
+
+It is not recommended to use these APIs for file based streams because a missing header would usually indicate a corrupt or perverse file. In addition, these
+APIs can take a long time to initialize because they may need to spend a lot of time finding the first frame.
+
+
+
+Build Options
+=============
+#define these options before including this file.
+
+#define DR_FLAC_NO_STDIO
+  Disable `drflac_open_file()` and family.
+
+#define DR_FLAC_NO_OGG
+  Disables support for Ogg/FLAC streams.
+
+#define DR_FLAC_BUFFER_SIZE <number>
+  Defines the size of the internal buffer to store data from onRead(). This buffer is used to reduce the number of calls back to the client for more data.
+  Larger values means more memory, but better performance. My tests show diminishing returns after about 4KB (which is the default). Consider reducing this if
+  you have a very efficient implementation of onRead(), or increase it if it's very inefficient. Must be a multiple of 8.
+
+#define DR_FLAC_NO_CRC
+  Disables CRC checks. This will offer a performance boost when CRC is unnecessary. This will disable binary search seeking. When seeking, the seek table will
+  be used if available. Otherwise the seek will be performed using brute force.
+
+#define DR_FLAC_NO_SIMD
+  Disables SIMD optimizations (SSE on x86/x64 architectures, NEON on ARM architectures). Use this if you are having compatibility issues with your compiler.
+
+
+
+Notes
+=====
+- dr_flac does not support changing the sample rate nor channel count mid stream.
+- dr_flac is not thread-safe, but its APIs can be called from any thread so long as you do your own synchronization.
+- When using Ogg encapsulation, a corrupted metadata block will result in `drflac_open_with_metadata()` and `drflac_open()` returning inconsistent samples due
+  to differences in corrupted stream recorvery logic between the two APIs.
+*/
+
+#ifndef dr_flac_h
+#define dr_flac_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define DRFLAC_STRINGIFY(x)      #x
+#define DRFLAC_XSTRINGIFY(x)     DRFLAC_STRINGIFY(x)
+
+#define DRFLAC_VERSION_MAJOR     0
+#define DRFLAC_VERSION_MINOR     12
+#define DRFLAC_VERSION_REVISION  13
+#define DRFLAC_VERSION_STRING    DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MAJOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MINOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_REVISION)
+
+#include <stddef.h> /* For size_t. */
+
+/* Sized types. Prefer built-in types. Fall back to stdint. */
+#ifdef _MSC_VER
+    #if defined(__clang__)
+        #pragma GCC diagnostic push
+        #pragma GCC diagnostic ignored "-Wlanguage-extension-token"
+        #pragma GCC diagnostic ignored "-Wlong-long"        
+        #pragma GCC diagnostic ignored "-Wc++11-long-long"
+    #endif
+    typedef   signed __int8  drflac_int8;
+    typedef unsigned __int8  drflac_uint8;
+    typedef   signed __int16 drflac_int16;
+    typedef unsigned __int16 drflac_uint16;
+    typedef   signed __int32 drflac_int32;
+    typedef unsigned __int32 drflac_uint32;
+    typedef   signed __int64 drflac_int64;
+    typedef unsigned __int64 drflac_uint64;
+    #if defined(__clang__)
+        #pragma GCC diagnostic pop
+    #endif
+#else
+    #include <stdint.h>
+    typedef int8_t           drflac_int8;
+    typedef uint8_t          drflac_uint8;
+    typedef int16_t          drflac_int16;
+    typedef uint16_t         drflac_uint16;
+    typedef int32_t          drflac_int32;
+    typedef uint32_t         drflac_uint32;
+    typedef int64_t          drflac_int64;
+    typedef uint64_t         drflac_uint64;
+#endif
+typedef drflac_uint8         drflac_bool8;
+typedef drflac_uint32        drflac_bool32;
+#define DRFLAC_TRUE          1
+#define DRFLAC_FALSE         0
+
+#if !defined(DRFLAC_API)
+    #if defined(DRFLAC_DLL)
+        #if defined(_WIN32)
+            #define DRFLAC_DLL_IMPORT  __declspec(dllimport)
+            #define DRFLAC_DLL_EXPORT  __declspec(dllexport)
+            #define DRFLAC_DLL_PRIVATE static
+        #else
+            #if defined(__GNUC__) && __GNUC__ >= 4
+                #define DRFLAC_DLL_IMPORT  __attribute__((visibility("default")))
+                #define DRFLAC_DLL_EXPORT  __attribute__((visibility("default")))
+                #define DRFLAC_DLL_PRIVATE __attribute__((visibility("hidden")))
+            #else
+                #define DRFLAC_DLL_IMPORT
+                #define DRFLAC_DLL_EXPORT
+                #define DRFLAC_DLL_PRIVATE static
+            #endif
+        #endif
+
+        #if defined(DR_FLAC_IMPLEMENTATION) || defined(DRFLAC_IMPLEMENTATION)
+            #define DRFLAC_API  DRFLAC_DLL_EXPORT
+        #else
+            #define DRFLAC_API  DRFLAC_DLL_IMPORT
+        #endif
+        #define DRFLAC_PRIVATE DRFLAC_DLL_PRIVATE
+    #else
+        #define DRFLAC_API extern
+        #define DRFLAC_PRIVATE static
+    #endif
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1700   /* Visual Studio 2012 */
+    #define DRFLAC_DEPRECATED       __declspec(deprecated)
+#elif (defined(__GNUC__) && __GNUC__ >= 4)  /* GCC 4 */
+    #define DRFLAC_DEPRECATED       __attribute__((deprecated))
+#elif defined(__has_feature)                /* Clang */
+    #if __has_feature(attribute_deprecated)
+        #define DRFLAC_DEPRECATED   __attribute__((deprecated))
+    #else
+        #define DRFLAC_DEPRECATED
+    #endif
+#else
+    #define DRFLAC_DEPRECATED
+#endif
+
+DRFLAC_API void drflac_version(drflac_uint32* pMajor, drflac_uint32* pMinor, drflac_uint32* pRevision);
+DRFLAC_API const char* drflac_version_string();
+
+/*
+As data is read from the client it is placed into an internal buffer for fast access. This controls the size of that buffer. Larger values means more speed,
+but also more memory. In my testing there is diminishing returns after about 4KB, but you can fiddle with this to suit your own needs. Must be a multiple of 8.
+*/
+#ifndef DR_FLAC_BUFFER_SIZE
+#define DR_FLAC_BUFFER_SIZE   4096
+#endif
+
+/* Check if we can enable 64-bit optimizations. */
+#if defined(_WIN64) || defined(_LP64) || defined(__LP64__)
+#define DRFLAC_64BIT
+#endif
+
+#ifdef DRFLAC_64BIT
+typedef drflac_uint64 drflac_cache_t;
+#else
+typedef drflac_uint32 drflac_cache_t;
+#endif
+
+/* The various metadata block types. */
+#define DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO       0
+#define DRFLAC_METADATA_BLOCK_TYPE_PADDING          1
+#define DRFLAC_METADATA_BLOCK_TYPE_APPLICATION      2
+#define DRFLAC_METADATA_BLOCK_TYPE_SEEKTABLE        3
+#define DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT   4
+#define DRFLAC_METADATA_BLOCK_TYPE_CUESHEET         5
+#define DRFLAC_METADATA_BLOCK_TYPE_PICTURE          6
+#define DRFLAC_METADATA_BLOCK_TYPE_INVALID          127
+
+/* The various picture types specified in the PICTURE block. */
+#define DRFLAC_PICTURE_TYPE_OTHER                   0
+#define DRFLAC_PICTURE_TYPE_FILE_ICON               1
+#define DRFLAC_PICTURE_TYPE_OTHER_FILE_ICON         2
+#define DRFLAC_PICTURE_TYPE_COVER_FRONT             3
+#define DRFLAC_PICTURE_TYPE_COVER_BACK              4
+#define DRFLAC_PICTURE_TYPE_LEAFLET_PAGE            5
+#define DRFLAC_PICTURE_TYPE_MEDIA                   6
+#define DRFLAC_PICTURE_TYPE_LEAD_ARTIST             7
+#define DRFLAC_PICTURE_TYPE_ARTIST                  8
+#define DRFLAC_PICTURE_TYPE_CONDUCTOR               9
+#define DRFLAC_PICTURE_TYPE_BAND                    10
+#define DRFLAC_PICTURE_TYPE_COMPOSER                11
+#define DRFLAC_PICTURE_TYPE_LYRICIST                12
+#define DRFLAC_PICTURE_TYPE_RECORDING_LOCATION      13
+#define DRFLAC_PICTURE_TYPE_DURING_RECORDING        14
+#define DRFLAC_PICTURE_TYPE_DURING_PERFORMANCE      15
+#define DRFLAC_PICTURE_TYPE_SCREEN_CAPTURE          16
+#define DRFLAC_PICTURE_TYPE_BRIGHT_COLORED_FISH     17
+#define DRFLAC_PICTURE_TYPE_ILLUSTRATION            18
+#define DRFLAC_PICTURE_TYPE_BAND_LOGOTYPE           19
+#define DRFLAC_PICTURE_TYPE_PUBLISHER_LOGOTYPE      20
+
+typedef enum
+{
+    drflac_container_native,
+    drflac_container_ogg,
+    drflac_container_unknown
+} drflac_container;
+
+typedef enum
+{
+    drflac_seek_origin_start,
+    drflac_seek_origin_current
+} drflac_seek_origin;
+
+/* Packing is important on this structure because we map this directly to the raw data within the SEEKTABLE metadata block. */
+#pragma pack(2)
+typedef struct
+{
+    drflac_uint64 firstPCMFrame;
+    drflac_uint64 flacFrameOffset;   /* The offset from the first byte of the header of the first frame. */
+    drflac_uint16 pcmFrameCount;
+} drflac_seekpoint;
+#pragma pack()
+
+typedef struct
+{
+    drflac_uint16 minBlockSizeInPCMFrames;
+    drflac_uint16 maxBlockSizeInPCMFrames;
+    drflac_uint32 minFrameSizeInPCMFrames;
+    drflac_uint32 maxFrameSizeInPCMFrames;
+    drflac_uint32 sampleRate;
+    drflac_uint8  channels;
+    drflac_uint8  bitsPerSample;
+    drflac_uint64 totalPCMFrameCount;
+    drflac_uint8  md5[16];
+} drflac_streaminfo;
+
+typedef struct
+{
+    /* The metadata type. Use this to know how to interpret the data below. */
+    drflac_uint32 type;
+
+    /*
+    A pointer to the raw data. This points to a temporary buffer so don't hold on to it. It's best to
+    not modify the contents of this buffer. Use the structures below for more meaningful and structured
+    information about the metadata. It's possible for this to be null.
+    */
+    const void* pRawData;
+
+    /* The size in bytes of the block and the buffer pointed to by pRawData if it's non-NULL. */
+    drflac_uint32 rawDataSize;
+
+    union
+    {
+        drflac_streaminfo streaminfo;
+
+        struct
+        {
+            int unused;
+        } padding;
+
+        struct
+        {
+            drflac_uint32 id;
+            const void* pData;
+            drflac_uint32 dataSize;
+        } application;
+
+        struct
+        {
+            drflac_uint32 seekpointCount;
+            const drflac_seekpoint* pSeekpoints;
+        } seektable;
+
+        struct
+        {
+            drflac_uint32 vendorLength;
+            const char* vendor;
+            drflac_uint32 commentCount;
+            const void* pComments;
+        } vorbis_comment;
+
+        struct
+        {
+            char catalog[128];
+            drflac_uint64 leadInSampleCount;
+            drflac_bool32 isCD;
+            drflac_uint8 trackCount;
+            const void* pTrackData;
+        } cuesheet;
+
+        struct
+        {
+            drflac_uint32 type;
+            drflac_uint32 mimeLength;
+            const char* mime;
+            drflac_uint32 descriptionLength;
+            const char* description;
+            drflac_uint32 width;
+            drflac_uint32 height;
+            drflac_uint32 colorDepth;
+            drflac_uint32 indexColorCount;
+            drflac_uint32 pictureDataSize;
+            const drflac_uint8* pPictureData;
+        } picture;
+    } data;
+} drflac_metadata;
+
+
+/*
+Callback for when data needs to be read from the client.
+
+
+Parameters
+----------
+pUserData (in)
+    The user data that was passed to drflac_open() and family.
+
+pBufferOut (out)
+    The output buffer.
+
+bytesToRead (in)
+    The number of bytes to read.
+
+
+Return Value
+------------
+The number of bytes actually read.
+
+
+Remarks
+-------
+A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until either the entire bytesToRead is filled or
+you have reached the end of the stream.
+*/
+typedef size_t (* drflac_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead);
+
+/*
+Callback for when data needs to be seeked.
+
+
+Parameters
+----------
+pUserData (in)
+    The user data that was passed to drflac_open() and family.
+
+offset (in)
+    The number of bytes to move, relative to the origin. Will never be negative.
+
+origin (in)
+    The origin of the seek - the current position or the start of the stream.
+
+
+Return Value
+------------
+Whether or not the seek was successful.
+
+
+Remarks
+-------
+The offset will never be negative. Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which will be
+either drflac_seek_origin_start or drflac_seek_origin_current.
+
+When seeking to a PCM frame using drflac_seek_to_pcm_frame(), dr_flac may call this with an offset beyond the end of the FLAC stream. This needs to be detected
+and handled by returning DRFLAC_FALSE.
+*/
+typedef drflac_bool32 (* drflac_seek_proc)(void* pUserData, int offset, drflac_seek_origin origin);
+
+/*
+Callback for when a metadata block is read.
+
+
+Parameters
+----------
+pUserData (in)
+    The user data that was passed to drflac_open() and family.
+
+pMetadata (in)
+    A pointer to a structure containing the data of the metadata block.
+
+
+Remarks
+-------
+Use pMetadata->type to determine which metadata block is being handled and how to read the data.
+*/
+typedef void (* drflac_meta_proc)(void* pUserData, drflac_metadata* pMetadata);
+
+
+typedef struct
+{
+    void* pUserData;
+    void* (* onMalloc)(size_t sz, void* pUserData);
+    void* (* onRealloc)(void* p, size_t sz, void* pUserData);
+    void  (* onFree)(void* p, void* pUserData);
+} drflac_allocation_callbacks;
+
+/* Structure for internal use. Only used for decoders opened with drflac_open_memory. */
+typedef struct
+{
+    const drflac_uint8* data;
+    size_t dataSize;
+    size_t currentReadPos;
+} drflac__memory_stream;
+
+/* Structure for internal use. Used for bit streaming. */
+typedef struct
+{
+    /* The function to call when more data needs to be read. */
+    drflac_read_proc onRead;
+
+    /* The function to call when the current read position needs to be moved. */
+    drflac_seek_proc onSeek;
+
+    /* The user data to pass around to onRead and onSeek. */
+    void* pUserData;
+
+
+    /*
+    The number of unaligned bytes in the L2 cache. This will always be 0 until the end of the stream is hit. At the end of the
+    stream there will be a number of bytes that don't cleanly fit in an L1 cache line, so we use this variable to know whether
+    or not the bistreamer needs to run on a slower path to read those last bytes. This will never be more than sizeof(drflac_cache_t).
+    */
+    size_t unalignedByteCount;
+
+    /* The content of the unaligned bytes. */
+    drflac_cache_t unalignedCache;
+
+    /* The index of the next valid cache line in the "L2" cache. */
+    drflac_uint32 nextL2Line;
+
+    /* The number of bits that have been consumed by the cache. This is used to determine how many valid bits are remaining. */
+    drflac_uint32 consumedBits;
+
+    /*
+    The cached data which was most recently read from the client. There are two levels of cache. Data flows as such:
+    Client -> L2 -> L1. The L2 -> L1 movement is aligned and runs on a fast path in just a few instructions.
+    */
+    drflac_cache_t cacheL2[DR_FLAC_BUFFER_SIZE/sizeof(drflac_cache_t)];
+    drflac_cache_t cache;
+
+    /*
+    CRC-16. This is updated whenever bits are read from the bit stream. Manually set this to 0 to reset the CRC. For FLAC, this
+    is reset to 0 at the beginning of each frame.
+    */
+    drflac_uint16 crc16;
+    drflac_cache_t crc16Cache;              /* A cache for optimizing CRC calculations. This is filled when when the L1 cache is reloaded. */
+    drflac_uint32 crc16CacheIgnoredBytes;   /* The number of bytes to ignore when updating the CRC-16 from the CRC-16 cache. */
+} drflac_bs;
+
+typedef struct
+{
+    /* The type of the subframe: SUBFRAME_CONSTANT, SUBFRAME_VERBATIM, SUBFRAME_FIXED or SUBFRAME_LPC. */
+    drflac_uint8 subframeType;
+
+    /* The number of wasted bits per sample as specified by the sub-frame header. */
+    drflac_uint8 wastedBitsPerSample;
+
+    /* The order to use for the prediction stage for SUBFRAME_FIXED and SUBFRAME_LPC. */
+    drflac_uint8 lpcOrder;
+
+    /* A pointer to the buffer containing the decoded samples in the subframe. This pointer is an offset from drflac::pExtraData. */
+    drflac_int32* pSamplesS32;
+} drflac_subframe;
+
+typedef struct
+{
+    /*
+    If the stream uses variable block sizes, this will be set to the index of the first PCM frame. If fixed block sizes are used, this will
+    always be set to 0. This is 64-bit because the decoded PCM frame number will be 36 bits.
+    */
+    drflac_uint64 pcmFrameNumber;
+
+    /*
+    If the stream uses fixed block sizes, this will be set to the frame number. If variable block sizes are used, this will always be 0. This
+    is 32-bit because in fixed block sizes, the maximum frame number will be 31 bits.
+    */
+    drflac_uint32 flacFrameNumber;
+
+    /* The sample rate of this frame. */
+    drflac_uint32 sampleRate;
+
+    /* The number of PCM frames in each sub-frame within this frame. */
+    drflac_uint16 blockSizeInPCMFrames;
+
+    /*
+    The channel assignment of this frame. This is not always set to the channel count. If interchannel decorrelation is being used this
+    will be set to DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE, DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE or DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE.
+    */
+    drflac_uint8 channelAssignment;
+
+    /* The number of bits per sample within this frame. */
+    drflac_uint8 bitsPerSample;
+
+    /* The frame's CRC. */
+    drflac_uint8 crc8;
+} drflac_frame_header;
+
+typedef struct
+{
+    /* The header. */
+    drflac_frame_header header;
+
+    /*
+    The number of PCM frames left to be read in this FLAC frame. This is initially set to the block size. As PCM frames are read,
+    this will be decremented. When it reaches 0, the decoder will see this frame as fully consumed and load the next frame.
+    */
+    drflac_uint32 pcmFramesRemaining;
+
+    /* The list of sub-frames within the frame. There is one sub-frame for each channel, and there's a maximum of 8 channels. */
+    drflac_subframe subframes[8];
+} drflac_frame;
+
+typedef struct
+{
+    /* The function to call when a metadata block is read. */
+    drflac_meta_proc onMeta;
+
+    /* The user data posted to the metadata callback function. */
+    void* pUserDataMD;
+
+    /* Memory allocation callbacks. */
+    drflac_allocation_callbacks allocationCallbacks;
+
+
+    /* The sample rate. Will be set to something like 44100. */
+    drflac_uint32 sampleRate;
+
+    /*
+    The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. Maximum 8. This is set based on the
+    value specified in the STREAMINFO block.
+    */
+    drflac_uint8 channels;
+
+    /* The bits per sample. Will be set to something like 16, 24, etc. */
+    drflac_uint8 bitsPerSample;
+
+    /* The maximum block size, in samples. This number represents the number of samples in each channel (not combined). */
+    drflac_uint16 maxBlockSizeInPCMFrames;
+
+    /*
+    The total number of PCM Frames making up the stream. Can be 0 in which case it's still a valid stream, but just means
+    the total PCM frame count is unknown. Likely the case with streams like internet radio.
+    */
+    drflac_uint64 totalPCMFrameCount;
+
+
+    /* The container type. This is set based on whether or not the decoder was opened from a native or Ogg stream. */
+    drflac_container container;
+
+    /* The number of seekpoints in the seektable. */
+    drflac_uint32 seekpointCount;
+
+
+    /* Information about the frame the decoder is currently sitting on. */
+    drflac_frame currentFLACFrame;
+
+
+    /* The index of the PCM frame the decoder is currently sitting on. This is only used for seeking. */
+    drflac_uint64 currentPCMFrame;
+
+    /* The position of the first FLAC frame in the stream. This is only ever used for seeking. */
+    drflac_uint64 firstFLACFramePosInBytes;
+
+
+    /* A hack to avoid a malloc() when opening a decoder with drflac_open_memory(). */
+    drflac__memory_stream memoryStream;
+
+
+    /* A pointer to the decoded sample data. This is an offset of pExtraData. */
+    drflac_int32* pDecodedSamples;
+
+    /* A pointer to the seek table. This is an offset of pExtraData, or NULL if there is no seek table. */
+    drflac_seekpoint* pSeekpoints;
+
+    /* Internal use only. Only used with Ogg containers. Points to a drflac_oggbs object. This is an offset of pExtraData. */
+    void* _oggbs;
+
+    /* Internal use only. Used for profiling and testing different seeking modes. */
+    drflac_bool32 _noSeekTableSeek    : 1;
+    drflac_bool32 _noBinarySearchSeek : 1;
+    drflac_bool32 _noBruteForceSeek   : 1;
+
+    /* The bit streamer. The raw FLAC data is fed through this object. */
+    drflac_bs bs;
+
+    /* Variable length extra data. We attach this to the end of the object so we can avoid unnecessary mallocs. */
+    drflac_uint8 pExtraData[1];
+} drflac;
+
+
+/*
+Opens a FLAC decoder.
+
+
+Parameters
+----------
+onRead (in)
+    The function to call when data needs to be read from the client.
+
+onSeek (in)
+    The function to call when the read position of the client data needs to move.
+
+pUserData (in, optional)
+    A pointer to application defined data that will be passed to onRead and onSeek.
+
+pAllocationCallbacks (in, optional)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Return Value
+------------
+Returns a pointer to an object representing the decoder.
+
+
+Remarks
+-------
+Close the decoder with `drflac_close()`.
+
+`pAllocationCallbacks` can be NULL in which case it will use `DRFLAC_MALLOC`, `DRFLAC_REALLOC` and `DRFLAC_FREE`.
+
+This function will automatically detect whether or not you are attempting to open a native or Ogg encapsulated FLAC, both of which should work seamlessly
+without any manual intervention. Ogg encapsulation also works with multiplexed streams which basically means it can play FLAC encoded audio tracks in videos.
+
+This is the lowest level function for opening a FLAC stream. You can also use `drflac_open_file()` and `drflac_open_memory()` to open the stream from a file or
+from a block of memory respectively.
+
+The STREAMINFO block must be present for this to succeed. Use `drflac_open_relaxed()` to open a FLAC stream where the header may not be present.
+
+
+Seek Also
+---------
+drflac_open_file()
+drflac_open_memory()
+drflac_open_with_metadata()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Opens a FLAC stream with relaxed validation of the header block.
+
+
+Parameters
+----------
+onRead (in)
+    The function to call when data needs to be read from the client.
+
+onSeek (in)
+    The function to call when the read position of the client data needs to move.
+
+container (in)
+    Whether or not the FLAC stream is encapsulated using standard FLAC encapsulation or Ogg encapsulation.
+
+pUserData (in, optional)
+    A pointer to application defined data that will be passed to onRead and onSeek.
+
+pAllocationCallbacks (in, optional)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Return Value
+------------
+A pointer to an object representing the decoder.
+
+
+Remarks
+-------
+The same as drflac_open(), except attempts to open the stream even when a header block is not present.
+
+Because the header is not necessarily available, the caller must explicitly define the container (Native or Ogg). Do not set this to `drflac_container_unknown`
+as that is for internal use only.
+
+Opening in relaxed mode will continue reading data from onRead until it finds a valid frame. If a frame is never found it will continue forever. To abort,
+force your `onRead` callback to return 0, which dr_flac will use as an indicator that the end of the stream was found.
+*/
+DRFLAC_API drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Opens a FLAC decoder and notifies the caller of the metadata chunks (album art, etc.).
+
+
+Parameters
+----------
+onRead (in)
+    The function to call when data needs to be read from the client.
+
+onSeek (in)
+    The function to call when the read position of the client data needs to move.
+
+onMeta (in)
+    The function to call for every metadata block.
+
+pUserData (in, optional)
+    A pointer to application defined data that will be passed to onRead, onSeek and onMeta.
+
+pAllocationCallbacks (in, optional)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Return Value
+------------
+A pointer to an object representing the decoder.
+
+
+Remarks
+-------
+Close the decoder with `drflac_close()`.
+
+`pAllocationCallbacks` can be NULL in which case it will use `DRFLAC_MALLOC`, `DRFLAC_REALLOC` and `DRFLAC_FREE`.
+
+This is slower than `drflac_open()`, so avoid this one if you don't need metadata. Internally, this will allocate and free memory on the heap for every
+metadata block except for STREAMINFO and PADDING blocks.
+
+The caller is notified of the metadata via the `onMeta` callback. All metadata blocks will be handled before the function returns.
+
+The STREAMINFO block must be present for this to succeed. Use `drflac_open_with_metadata_relaxed()` to open a FLAC stream where the header may not be present.
+
+Note that this will behave inconsistently with `drflac_open()` if the stream is an Ogg encapsulated stream and a metadata block is corrupted. This is due to
+the way the Ogg stream recovers from corrupted pages. When `drflac_open_with_metadata()` is being used, the open routine will try to read the contents of the
+metadata block, whereas `drflac_open()` will simply seek past it (for the sake of efficiency). This inconsistency can result in different samples being
+returned depending on whether or not the stream is being opened with metadata.
+
+
+Seek Also
+---------
+drflac_open_file_with_metadata()
+drflac_open_memory_with_metadata()
+drflac_open()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+The same as drflac_open_with_metadata(), except attempts to open the stream even when a header block is not present.
+
+See Also
+--------
+drflac_open_with_metadata()
+drflac_open_relaxed()
+*/
+DRFLAC_API drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Closes the given FLAC decoder.
+
+
+Parameters
+----------
+pFlac (in)
+    The decoder to close.
+
+
+Remarks
+-------
+This will destroy the decoder object.
+
+
+See Also
+--------
+drflac_open()
+drflac_open_with_metadata()
+drflac_open_file()
+drflac_open_file_w()
+drflac_open_file_with_metadata()
+drflac_open_file_with_metadata_w()
+drflac_open_memory()
+drflac_open_memory_with_metadata()
+*/
+DRFLAC_API void drflac_close(drflac* pFlac);
+
+
+/*
+Reads sample data from the given FLAC decoder, output as interleaved signed 32-bit PCM.
+
+
+Parameters
+----------
+pFlac (in)
+    The decoder.
+
+framesToRead (in)
+    The number of PCM frames to read.
+
+pBufferOut (out, optional)
+    A pointer to the buffer that will receive the decoded samples.
+
+
+Return Value
+------------
+Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end.
+
+
+Remarks
+-------
+pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked.
+*/
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut);
+
+
+/*
+Reads sample data from the given FLAC decoder, output as interleaved signed 16-bit PCM.
+
+
+Parameters
+----------
+pFlac (in)
+    The decoder.
+
+framesToRead (in)
+    The number of PCM frames to read.
+
+pBufferOut (out, optional)
+    A pointer to the buffer that will receive the decoded samples.
+
+
+Return Value
+------------
+Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end.
+
+
+Remarks
+-------
+pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked.
+
+Note that this is lossy for streams where the bits per sample is larger than 16.
+*/
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut);
+
+/*
+Reads sample data from the given FLAC decoder, output as interleaved 32-bit floating point PCM.
+
+
+Parameters
+----------
+pFlac (in)
+    The decoder.
+
+framesToRead (in)
+    The number of PCM frames to read.
+
+pBufferOut (out, optional)
+    A pointer to the buffer that will receive the decoded samples.
+
+
+Return Value
+------------
+Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end.
+
+
+Remarks
+-------
+pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked.
+
+Note that this should be considered lossy due to the nature of floating point numbers not being able to exactly represent every possible number.
+*/
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, double* pBufferOut);
+
+/*
+Seeks to the PCM frame at the given index.
+
+
+Parameters
+----------
+pFlac (in)
+    The decoder.
+
+pcmFrameIndex (in)
+    The index of the PCM frame to seek to. See notes below.
+
+
+Return Value
+-------------
+`DRFLAC_TRUE` if successful; `DRFLAC_FALSE` otherwise.
+*/
+DRFLAC_API drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex);
+
+
+
+#ifndef DR_FLAC_NO_STDIO
+/*
+Opens a FLAC decoder from the file at the given path.
+
+
+Parameters
+----------
+pFileName (in)
+    The path of the file to open, either absolute or relative to the current directory.
+
+pAllocationCallbacks (in, optional)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Return Value
+------------
+A pointer to an object representing the decoder.
+
+
+Remarks
+-------
+Close the decoder with drflac_close().
+
+
+Remarks
+-------
+This will hold a handle to the file until the decoder is closed with drflac_close(). Some platforms will restrict the number of files a process can have open
+at any given time, so keep this mind if you have many decoders open at the same time.
+
+
+See Also
+--------
+drflac_open_file_with_metadata()
+drflac_open()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open_file(const char* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks);
+DRFLAC_API drflac* drflac_open_file_w(const wchar_t* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Opens a FLAC decoder from the file at the given path and notifies the caller of the metadata chunks (album art, etc.)
+
+
+Parameters
+----------
+pFileName (in)
+    The path of the file to open, either absolute or relative to the current directory.
+
+pAllocationCallbacks (in, optional)
+    A pointer to application defined callbacks for managing memory allocations.
+
+onMeta (in)
+    The callback to fire for each metadata block.
+
+pUserData (in)
+    A pointer to the user data to pass to the metadata callback.
+
+pAllocationCallbacks (in)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Remarks
+-------
+Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled.
+
+
+See Also
+--------
+drflac_open_with_metadata()
+drflac_open()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open_file_with_metadata(const char* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+DRFLAC_API drflac* drflac_open_file_with_metadata_w(const wchar_t* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+#endif
+
+/*
+Opens a FLAC decoder from a pre-allocated block of memory
+
+
+Parameters
+----------
+pData (in)
+    A pointer to the raw encoded FLAC data.
+
+dataSize (in)
+    The size in bytes of `data`.
+
+pAllocationCallbacks (in)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Return Value
+------------
+A pointer to an object representing the decoder.
+
+
+Remarks
+-------
+This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for the lifetime of the decoder.
+
+
+See Also
+--------
+drflac_open()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open_memory(const void* pData, size_t dataSize, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Opens a FLAC decoder from a pre-allocated block of memory and notifies the caller of the metadata chunks (album art, etc.)
+
+
+Parameters
+----------
+pData (in)
+    A pointer to the raw encoded FLAC data.
+
+dataSize (in)
+    The size in bytes of `data`.
+
+onMeta (in)
+    The callback to fire for each metadata block.
+
+pUserData (in)
+    A pointer to the user data to pass to the metadata callback.
+
+pAllocationCallbacks (in)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Remarks
+-------
+Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled.
+
+
+See Also
+-------
+drflac_open_with_metadata()
+drflac_open()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open_memory_with_metadata(const void* pData, size_t dataSize, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+
+
+/* High Level APIs */
+
+/*
+Opens a FLAC stream from the given callbacks and fully decodes it in a single operation. The return value is a
+pointer to the sample data as interleaved signed 32-bit PCM. The returned data must be freed with drflac_free().
+
+You can pass in custom memory allocation callbacks via the pAllocationCallbacks parameter. This can be NULL in which
+case it will use DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE.
+
+Sometimes a FLAC file won't keep track of the total sample count. In this situation the function will continuously
+read samples into a dynamically sized buffer on the heap until no samples are left.
+
+Do not call this function on a broadcast type of stream (like internet radio streams and whatnot).
+*/
+DRFLAC_API drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */
+DRFLAC_API drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */
+DRFLAC_API double* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+#ifndef DR_FLAC_NO_STDIO
+/* Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a file. */
+DRFLAC_API drflac_int32* drflac_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_file_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */
+DRFLAC_API drflac_int16* drflac_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_file_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */
+DRFLAC_API double* drflac_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+#endif
+
+/* Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a block of memory. */
+DRFLAC_API drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */
+DRFLAC_API drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */
+DRFLAC_API double* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Frees memory that was allocated internally by dr_flac.
+
+Set pAllocationCallbacks to the same object that was passed to drflac_open_*_and_read_pcm_frames_*(). If you originally passed in NULL, pass in NULL for this.
+*/
+DRFLAC_API void drflac_free(void* p, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+
+/* Structure representing an iterator for vorbis comments in a VORBIS_COMMENT metadata block. */
+typedef struct
+{
+    drflac_uint32 countRemaining;
+    const char* pRunningData;
+} drflac_vorbis_comment_iterator;
+
+/*
+Initializes a vorbis comment iterator. This can be used for iterating over the vorbis comments in a VORBIS_COMMENT
+metadata block.
+*/
+DRFLAC_API void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const void* pComments);
+
+/*
+Goes to the next vorbis comment in the given iterator. If null is returned it means there are no more comments. The
+returned string is NOT null terminated.
+*/
+DRFLAC_API const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut);
+
+
+/* Structure representing an iterator for cuesheet tracks in a CUESHEET metadata block. */
+typedef struct
+{
+    drflac_uint32 countRemaining;
+    const char* pRunningData;
+} drflac_cuesheet_track_iterator;
+
+/* Packing is important on this structure because we map this directly to the raw data within the CUESHEET metadata block. */
+#pragma pack(4)
+typedef struct
+{
+    drflac_uint64 offset;
+    drflac_uint8 index;
+    drflac_uint8 reserved[3];
+} drflac_cuesheet_track_index;
+#pragma pack()
+
+typedef struct
+{
+    drflac_uint64 offset;
+    drflac_uint8 trackNumber;
+    char ISRC[12];
+    drflac_bool8 isAudio;
+    drflac_bool8 preEmphasis;
+    drflac_uint8 indexCount;
+    const drflac_cuesheet_track_index* pIndexPoints;
+} drflac_cuesheet_track;
+
+/*
+Initializes a cuesheet track iterator. This can be used for iterating over the cuesheet tracks in a CUESHEET metadata
+block.
+*/
+DRFLAC_API void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_uint32 trackCount, const void* pTrackData);
+
+/* Goes to the next cuesheet track in the given iterator. If DRFLAC_FALSE is returned it means there are no more comments. */
+DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack);
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif  /* dr_flac_h */
+
+
+/************************************************************************************************************************************************************
+ ************************************************************************************************************************************************************
+
+ IMPLEMENTATION
+
+ ************************************************************************************************************************************************************
+ ************************************************************************************************************************************************************/
+#if defined(DR_FLAC_IMPLEMENTATION) || defined(DRFLAC_IMPLEMENTATION)
+
+/* Disable some annoying warnings. */
+#if defined(__GNUC__)
+    #pragma GCC diagnostic push
+    #if __GNUC__ >= 7
+    #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
+    #endif
+#endif
+
+#ifdef __linux__
+    #ifndef _BSD_SOURCE
+        #define _BSD_SOURCE
+    #endif
+    #ifndef __USE_BSD
+        #define __USE_BSD
+    #endif
+    #include <endian.h>
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _MSC_VER
+    #define DRFLAC_INLINE __forceinline
+#elif defined(__GNUC__)
+    /*
+    I've had a bug report where GCC is emitting warnings about functions possibly not being inlineable. This warning happens when
+    the __attribute__((always_inline)) attribute is defined without an "inline" statement. I think therefore there must be some
+    case where "__inline__" is not always defined, thus the compiler emitting these warnings. When using -std=c89 or -ansi on the
+    command line, we cannot use the "inline" keyword and instead need to use "__inline__". In an attempt to work around this issue
+    I am using "__inline__" only when we're compiling in strict ANSI mode.
+    */
+    #if defined(__STRICT_ANSI__)
+        #define DRFLAC_INLINE __inline__ __attribute__((always_inline))
+    #else
+        #define DRFLAC_INLINE inline __attribute__((always_inline))
+    #endif
+#else
+    #define DRFLAC_INLINE
+#endif
+
+/* CPU architecture. */
+#if defined(__x86_64__) || defined(_M_X64)
+    #define DRFLAC_X64
+#elif defined(__i386) || defined(_M_IX86)
+    #define DRFLAC_X86
+#elif defined(__arm__) || defined(_M_ARM)
+    #define DRFLAC_ARM
+#endif
+
+/* Intrinsics Support */
+#if !defined(DR_FLAC_NO_SIMD)
+    #if defined(DRFLAC_X64) || defined(DRFLAC_X86)
+        #if defined(_MSC_VER) && !defined(__clang__)
+            /* MSVC. */
+            #if _MSC_VER >= 1400 && !defined(DRFLAC_NO_SSE2)    /* 2005 */
+                #define DRFLAC_SUPPORT_SSE2
+            #endif
+            #if _MSC_VER >= 1600 && !defined(DRFLAC_NO_SSE41)   /* 2010 */
+                #define DRFLAC_SUPPORT_SSE41
+            #endif
+        #else
+            /* Assume GNUC-style. */
+            #if defined(__SSE2__) && !defined(DRFLAC_NO_SSE2)
+                #define DRFLAC_SUPPORT_SSE2
+            #endif
+            #if defined(__SSE4_1__) && !defined(DRFLAC_NO_SSE41)
+                #define DRFLAC_SUPPORT_SSE41
+            #endif
+        #endif
+
+        /* If at this point we still haven't determined compiler support for the intrinsics just fall back to __has_include. */
+        #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include)
+            #if !defined(DRFLAC_SUPPORT_SSE2) && !defined(DRFLAC_NO_SSE2) && __has_include(<emmintrin.h>)
+                #define DRFLAC_SUPPORT_SSE2
+            #endif
+            #if !defined(DRFLAC_SUPPORT_SSE41) && !defined(DRFLAC_NO_SSE41) && __has_include(<smmintrin.h>)
+                #define DRFLAC_SUPPORT_SSE41
+            #endif
+        #endif
+
+        #if defined(DRFLAC_SUPPORT_SSE41)
+            #include <smmintrin.h>
+        #elif defined(DRFLAC_SUPPORT_SSE2)
+            #include <emmintrin.h>
+        #endif
+    #endif
+
+    #if defined(DRFLAC_ARM)
+        #if !defined(DRFLAC_NO_NEON) && (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64))
+            #define DRFLAC_SUPPORT_NEON
+        #endif
+
+        /* Fall back to looking for the #include file. */
+        #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include)
+            #if !defined(DRFLAC_SUPPORT_NEON) && !defined(DRFLAC_NO_NEON) && __has_include(<arm_neon.h>)
+                #define DRFLAC_SUPPORT_NEON
+            #endif
+        #endif
+
+        #if defined(DRFLAC_SUPPORT_NEON)
+            #include <arm_neon.h>
+        #endif
+    #endif
+#endif
+
+/* Compile-time CPU feature support. */
+#if !defined(DR_FLAC_NO_SIMD) && (defined(DRFLAC_X86) || defined(DRFLAC_X64))
+    #if defined(_MSC_VER) && !defined(__clang__)
+        #if _MSC_VER >= 1400
+            #include <intrin.h>
+            static void drflac__cpuid(int info[4], int fid)
+            {
+                __cpuid(info, fid);
+            }
+        #else
+            #define DRFLAC_NO_CPUID
+        #endif
+    #else
+        #if defined(__GNUC__) || defined(__clang__)
+            static void drflac__cpuid(int info[4], int fid)
+            {
+                /*
+                It looks like the -fPIC option uses the ebx register which GCC complains about. We can work around this by just using a different register, the
+                specific register of which I'm letting the compiler decide on. The "k" prefix is used to specify a 32-bit register. The {...} syntax is for
+                supporting different assembly dialects.
+
+                What's basically happening is that we're saving and restoring the ebx register manually.
+                */
+                #if defined(DRFLAC_X86) && defined(__PIC__)
+                    __asm__ __volatile__ (
+                        "xchg{l} {%%}ebx, %k1;"
+                        "cpuid;"
+                        "xchg{l} {%%}ebx, %k1;"
+                        : "=a"(info[0]), "=&r"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(fid), "c"(0)
+                    );
+                #else
+                    __asm__ __volatile__ (
+                        "cpuid" : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(fid), "c"(0)
+                    );
+                #endif
+            }
+        #else
+            #define DRFLAC_NO_CPUID
+        #endif
+    #endif
+#else
+    #define DRFLAC_NO_CPUID
+#endif
+
+static DRFLAC_INLINE drflac_bool32 drflac_has_sse2(void)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE2)
+        #if defined(DRFLAC_X64)
+            return DRFLAC_TRUE;    /* 64-bit targets always support SSE2. */
+        #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE2__)
+            return DRFLAC_TRUE;    /* If the compiler is allowed to freely generate SSE2 code we can assume support. */
+        #else
+            #if defined(DRFLAC_NO_CPUID)
+                return DRFLAC_FALSE;
+            #else
+                int info[4];
+                drflac__cpuid(info, 1);
+                return (info[3] & (1 << 26)) != 0;
+            #endif
+        #endif
+    #else
+        return DRFLAC_FALSE;       /* SSE2 is only supported on x86 and x64 architectures. */
+    #endif
+#else
+    return DRFLAC_FALSE;           /* No compiler support. */
+#endif
+}
+
+static DRFLAC_INLINE drflac_bool32 drflac_has_sse41(void)
+{
+#if defined(DRFLAC_SUPPORT_SSE41)
+    #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE41)
+        #if defined(DRFLAC_X64)
+            return DRFLAC_TRUE;    /* 64-bit targets always support SSE4.1. */
+        #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE4_1__)
+            return DRFLAC_TRUE;    /* If the compiler is allowed to freely generate SSE41 code we can assume support. */
+        #else
+            #if defined(DRFLAC_NO_CPUID)
+                return DRFLAC_FALSE;
+            #else
+                int info[4];
+                drflac__cpuid(info, 1);
+                return (info[2] & (1 << 19)) != 0;
+            #endif
+        #endif
+    #else
+        return DRFLAC_FALSE;       /* SSE41 is only supported on x86 and x64 architectures. */
+    #endif
+#else
+    return DRFLAC_FALSE;           /* No compiler support. */
+#endif
+}
+
+
+#if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(DRFLAC_X86) || defined(DRFLAC_X64))
+    #define DRFLAC_HAS_LZCNT_INTRINSIC
+#elif (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)))
+    #define DRFLAC_HAS_LZCNT_INTRINSIC
+#elif defined(__clang__)
+    #if defined(__has_builtin)
+        #if __has_builtin(__builtin_clzll) || __has_builtin(__builtin_clzl)
+            #define DRFLAC_HAS_LZCNT_INTRINSIC
+        #endif
+    #endif
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+    #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
+    #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
+    #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
+#elif defined(__clang__)
+    #if defined(__has_builtin)
+        #if __has_builtin(__builtin_bswap16)
+            #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
+        #endif
+        #if __has_builtin(__builtin_bswap32)
+            #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
+        #endif
+        #if __has_builtin(__builtin_bswap64)
+            #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
+        #endif
+    #endif
+#elif defined(__GNUC__)
+    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+        #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
+        #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
+    #endif
+    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
+        #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
+    #endif
+#endif
+
+
+/* Standard library stuff. */
+#ifndef DRFLAC_ASSERT
+#include <assert.h>
+#define DRFLAC_ASSERT(expression)           assert(expression)
+#endif
+#ifndef DRFLAC_MALLOC
+#define DRFLAC_MALLOC(sz)                   malloc((sz))
+#endif
+#ifndef DRFLAC_REALLOC
+#define DRFLAC_REALLOC(p, sz)               realloc((p), (sz))
+#endif
+#ifndef DRFLAC_FREE
+#define DRFLAC_FREE(p)                      free((p))
+#endif
+#ifndef DRFLAC_COPY_MEMORY
+#define DRFLAC_COPY_MEMORY(dst, src, sz)    memcpy((dst), (src), (sz))
+#endif
+#ifndef DRFLAC_ZERO_MEMORY
+#define DRFLAC_ZERO_MEMORY(p, sz)           memset((p), 0, (sz))
+#endif
+#ifndef DRFLAC_ZERO_OBJECT
+#define DRFLAC_ZERO_OBJECT(p)               DRFLAC_ZERO_MEMORY((p), sizeof(*(p)))
+#endif
+
+#define DRFLAC_MAX_SIMD_VECTOR_SIZE                     64  /* 64 for AVX-512 in the future. */
+
+typedef drflac_int32 drflac_result;
+#define DRFLAC_SUCCESS                                   0
+#define DRFLAC_ERROR                                    -1   /* A generic error. */
+#define DRFLAC_INVALID_ARGS                             -2
+#define DRFLAC_INVALID_OPERATION                        -3
+#define DRFLAC_OUT_OF_MEMORY                            -4
+#define DRFLAC_OUT_OF_RANGE                             -5
+#define DRFLAC_ACCESS_DENIED                            -6
+#define DRFLAC_DOES_NOT_EXIST                           -7
+#define DRFLAC_ALREADY_EXISTS                           -8
+#define DRFLAC_TOO_MANY_OPEN_FILES                      -9
+#define DRFLAC_INVALID_FILE                             -10
+#define DRFLAC_TOO_BIG                                  -11
+#define DRFLAC_PATH_TOO_LONG                            -12
+#define DRFLAC_NAME_TOO_LONG                            -13
+#define DRFLAC_NOT_DIRECTORY                            -14
+#define DRFLAC_IS_DIRECTORY                             -15
+#define DRFLAC_DIRECTORY_NOT_EMPTY                      -16
+#define DRFLAC_END_OF_FILE                              -17
+#define DRFLAC_NO_SPACE                                 -18
+#define DRFLAC_BUSY                                     -19
+#define DRFLAC_IO_ERROR                                 -20
+#define DRFLAC_INTERRUPT                                -21
+#define DRFLAC_UNAVAILABLE                              -22
+#define DRFLAC_ALREADY_IN_USE                           -23
+#define DRFLAC_BAD_ADDRESS                              -24
+#define DRFLAC_BAD_SEEK                                 -25
+#define DRFLAC_BAD_PIPE                                 -26
+#define DRFLAC_DEADLOCK                                 -27
+#define DRFLAC_TOO_MANY_LINKS                           -28
+#define DRFLAC_NOT_IMPLEMENTED                          -29
+#define DRFLAC_NO_MESSAGE                               -30
+#define DRFLAC_BAD_MESSAGE                              -31
+#define DRFLAC_NO_DATA_AVAILABLE                        -32
+#define DRFLAC_INVALID_DATA                             -33
+#define DRFLAC_TIMEOUT                                  -34
+#define DRFLAC_NO_NETWORK                               -35
+#define DRFLAC_NOT_UNIQUE                               -36
+#define DRFLAC_NOT_SOCKET                               -37
+#define DRFLAC_NO_ADDRESS                               -38
+#define DRFLAC_BAD_PROTOCOL                             -39
+#define DRFLAC_PROTOCOL_UNAVAILABLE                     -40
+#define DRFLAC_PROTOCOL_NOT_SUPPORTED                   -41
+#define DRFLAC_PROTOCOL_FAMILY_NOT_SUPPORTED            -42
+#define DRFLAC_ADDRESS_FAMILY_NOT_SUPPORTED             -43
+#define DRFLAC_SOCKET_NOT_SUPPORTED                     -44
+#define DRFLAC_CONNECTION_RESET                         -45
+#define DRFLAC_ALREADY_CONNECTED                        -46
+#define DRFLAC_NOT_CONNECTED                            -47
+#define DRFLAC_CONNECTION_REFUSED                       -48
+#define DRFLAC_NO_HOST                                  -49
+#define DRFLAC_IN_PROGRESS                              -50
+#define DRFLAC_CANCELLED                                -51
+#define DRFLAC_MEMORY_ALREADY_MAPPED                    -52
+#define DRFLAC_AT_END                                   -53
+#define DRFLAC_CRC_MISMATCH                             -128
+
+#define DRFLAC_SUBFRAME_CONSTANT                        0
+#define DRFLAC_SUBFRAME_VERBATIM                        1
+#define DRFLAC_SUBFRAME_FIXED                           8
+#define DRFLAC_SUBFRAME_LPC                             32
+#define DRFLAC_SUBFRAME_RESERVED                        255
+
+#define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE  0
+#define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2 1
+
+#define DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT           0
+#define DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE             8
+#define DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE            9
+#define DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE              10
+
+#define drflac_align(x, a)                              ((((x) + (a) - 1) / (a)) * (a))
+
+
+DRFLAC_API void drflac_version(drflac_uint32* pMajor, drflac_uint32* pMinor, drflac_uint32* pRevision)
+{
+    if (pMajor) {
+        *pMajor = DRFLAC_VERSION_MAJOR;
+    }
+
+    if (pMinor) {
+        *pMinor = DRFLAC_VERSION_MINOR;
+    }
+
+    if (pRevision) {
+        *pRevision = DRFLAC_VERSION_REVISION;
+    }
+}
+
+DRFLAC_API const char* drflac_version_string()
+{
+    return DRFLAC_VERSION_STRING;
+}
+
+
+/* CPU caps. */
+#if defined(__has_feature)
+    #if __has_feature(thread_sanitizer)
+        #define DRFLAC_NO_THREAD_SANITIZE __attribute__((no_sanitize("thread")))
+    #else
+        #define DRFLAC_NO_THREAD_SANITIZE
+    #endif
+#else
+    #define DRFLAC_NO_THREAD_SANITIZE
+#endif
+
+#if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
+static drflac_bool32 drflac__gIsLZCNTSupported = DRFLAC_FALSE;
+#endif
+
+#ifndef DRFLAC_NO_CPUID
+static drflac_bool32 drflac__gIsSSE2Supported  = DRFLAC_FALSE;
+static drflac_bool32 drflac__gIsSSE41Supported = DRFLAC_FALSE;
+
+/*
+I've had a bug report that Clang's ThreadSanitizer presents a warning in this function. Having reviewed this, this does
+actually make sense. However, since CPU caps should never differ for a running process, I don't think the trade off of
+complicating internal API's by passing around CPU caps versus just disabling the warnings is worthwhile. I'm therefore
+just going to disable these warnings. This is disabled via the DRFLAC_NO_THREAD_SANITIZE attribute.
+*/
+DRFLAC_NO_THREAD_SANITIZE static void drflac__init_cpu_caps(void)
+{
+    static drflac_bool32 isCPUCapsInitialized = DRFLAC_FALSE;
+
+    if (!isCPUCapsInitialized) {
+        /* LZCNT */
+#if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
+        int info[4] = {0};
+        drflac__cpuid(info, 0x80000001);
+        drflac__gIsLZCNTSupported = (info[2] & (1 << 5)) != 0;
+#endif
+
+        /* SSE2 */
+        drflac__gIsSSE2Supported = drflac_has_sse2();
+
+        /* SSE4.1 */
+        drflac__gIsSSE41Supported = drflac_has_sse41();
+
+        /* Initialized. */
+        isCPUCapsInitialized = DRFLAC_TRUE;
+    }
+}
+#else
+static drflac_bool32 drflac__gIsNEONSupported  = DRFLAC_FALSE;
+
+static DRFLAC_INLINE drflac_bool32 drflac__has_neon(void)
+{
+#if defined(DRFLAC_SUPPORT_NEON)
+    #if defined(DRFLAC_ARM) && !defined(DRFLAC_NO_NEON)
+        #if (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64))
+            return DRFLAC_TRUE;    /* If the compiler is allowed to freely generate NEON code we can assume support. */
+        #else
+            /* TODO: Runtime check. */
+            return DRFLAC_FALSE;
+        #endif
+    #else
+        return DRFLAC_FALSE;       /* NEON is only supported on ARM architectures. */
+    #endif
+#else
+    return DRFLAC_FALSE;           /* No compiler support. */
+#endif
+}
+
+DRFLAC_NO_THREAD_SANITIZE static void drflac__init_cpu_caps(void)
+{
+    drflac__gIsNEONSupported = drflac__has_neon();
+
+#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5)
+    drflac__gIsLZCNTSupported = DRFLAC_TRUE;
+#endif
+}
+#endif
+
+
+/* Endian Management */
+static DRFLAC_INLINE drflac_bool32 drflac__is_little_endian(void)
+{
+#if defined(DRFLAC_X86) || defined(DRFLAC_X64)
+    return DRFLAC_TRUE;
+#elif defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN
+    return DRFLAC_TRUE;
+#else
+    int n = 1;
+    return (*(char*)&n) == 1;
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac__swap_endian_uint16(drflac_uint16 n)
+{
+#ifdef DRFLAC_HAS_BYTESWAP16_INTRINSIC
+    #if defined(_MSC_VER)
+        return _byteswap_ushort(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        return __builtin_bswap16(n);
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
+#else
+    return ((n & 0xFF00) >> 8) |
+           ((n & 0x00FF) << 8);
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac__swap_endian_uint32(drflac_uint32 n)
+{
+#ifdef DRFLAC_HAS_BYTESWAP32_INTRINSIC
+    #if defined(_MSC_VER)
+        return _byteswap_ulong(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        #if defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 6) && !defined(DRFLAC_64BIT)   /* <-- 64-bit inline assembly has not been tested, so disabling for now. */
+            /* Inline assembly optimized implementation for ARM. In my testing, GCC does not generate optimized code with __builtin_bswap32(). */
+            drflac_uint32 r;
+            __asm__ __volatile__ (
+            #if defined(DRFLAC_64BIT)
+                "rev %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(n)   /* <-- This is untested. If someone in the community could test this, that would be appreciated! */
+            #else
+                "rev %[out], %[in]" : [out]"=r"(r) : [in]"r"(n)
+            #endif
+            );
+            return r;
+        #else
+            return __builtin_bswap32(n);
+        #endif
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
+#else
+    return ((n & 0xFF000000) >> 24) |
+           ((n & 0x00FF0000) >>  8) |
+           ((n & 0x0000FF00) <<  8) |
+           ((n & 0x000000FF) << 24);
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint64 drflac__swap_endian_uint64(drflac_uint64 n)
+{
+#ifdef DRFLAC_HAS_BYTESWAP64_INTRINSIC
+    #if defined(_MSC_VER)
+        return _byteswap_uint64(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        return __builtin_bswap64(n);
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
+#else
+    return ((n & (drflac_uint64)0xFF00000000000000) >> 56) |
+           ((n & (drflac_uint64)0x00FF000000000000) >> 40) |
+           ((n & (drflac_uint64)0x0000FF0000000000) >> 24) |
+           ((n & (drflac_uint64)0x000000FF00000000) >>  8) |
+           ((n & (drflac_uint64)0x00000000FF000000) <<  8) |
+           ((n & (drflac_uint64)0x0000000000FF0000) << 24) |
+           ((n & (drflac_uint64)0x000000000000FF00) << 40) |
+           ((n & (drflac_uint64)0x00000000000000FF) << 56);
+#endif
+}
+
+
+static DRFLAC_INLINE drflac_uint16 drflac__be2host_16(drflac_uint16 n)
+{
+    if (drflac__is_little_endian()) {
+        return drflac__swap_endian_uint16(n);
+    }
+
+    return n;
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac__be2host_32(drflac_uint32 n)
+{
+    if (drflac__is_little_endian()) {
+        return drflac__swap_endian_uint32(n);
+    }
+
+    return n;
+}
+
+static DRFLAC_INLINE drflac_uint64 drflac__be2host_64(drflac_uint64 n)
+{
+    if (drflac__is_little_endian()) {
+        return drflac__swap_endian_uint64(n);
+    }
+
+    return n;
+}
+
+
+static DRFLAC_INLINE drflac_uint32 drflac__le2host_32(drflac_uint32 n)
+{
+    if (!drflac__is_little_endian()) {
+        return drflac__swap_endian_uint32(n);
+    }
+
+    return n;
+}
+
+
+static DRFLAC_INLINE drflac_uint32 drflac__unsynchsafe_32(drflac_uint32 n)
+{
+    drflac_uint32 result = 0;
+    result |= (n & 0x7F000000) >> 3;
+    result |= (n & 0x007F0000) >> 2;
+    result |= (n & 0x00007F00) >> 1;
+    result |= (n & 0x0000007F) >> 0;
+
+    return result;
+}
+
+
+
+/* The CRC code below is based on this document: http://zlib.net/crc_v3.txt */
+static drflac_uint8 drflac__crc8_table[] = {
+    0x00, 0x07, 0x0E, 0x09, 0x1C, 0x1B, 0x12, 0x15, 0x38, 0x3F, 0x36, 0x31, 0x24, 0x23, 0x2A, 0x2D,
+    0x70, 0x77, 0x7E, 0x79, 0x6C, 0x6B, 0x62, 0x65, 0x48, 0x4F, 0x46, 0x41, 0x54, 0x53, 0x5A, 0x5D,
+    0xE0, 0xE7, 0xEE, 0xE9, 0xFC, 0xFB, 0xF2, 0xF5, 0xD8, 0xDF, 0xD6, 0xD1, 0xC4, 0xC3, 0xCA, 0xCD,
+    0x90, 0x97, 0x9E, 0x99, 0x8C, 0x8B, 0x82, 0x85, 0xA8, 0xAF, 0xA6, 0xA1, 0xB4, 0xB3, 0xBA, 0xBD,
+    0xC7, 0xC0, 0xC9, 0xCE, 0xDB, 0xDC, 0xD5, 0xD2, 0xFF, 0xF8, 0xF1, 0xF6, 0xE3, 0xE4, 0xED, 0xEA,
+    0xB7, 0xB0, 0xB9, 0xBE, 0xAB, 0xAC, 0xA5, 0xA2, 0x8F, 0x88, 0x81, 0x86, 0x93, 0x94, 0x9D, 0x9A,
+    0x27, 0x20, 0x29, 0x2E, 0x3B, 0x3C, 0x35, 0x32, 0x1F, 0x18, 0x11, 0x16, 0x03, 0x04, 0x0D, 0x0A,
+    0x57, 0x50, 0x59, 0x5E, 0x4B, 0x4C, 0x45, 0x42, 0x6F, 0x68, 0x61, 0x66, 0x73, 0x74, 0x7D, 0x7A,
+    0x89, 0x8E, 0x87, 0x80, 0x95, 0x92, 0x9B, 0x9C, 0xB1, 0xB6, 0xBF, 0xB8, 0xAD, 0xAA, 0xA3, 0xA4,
+    0xF9, 0xFE, 0xF7, 0xF0, 0xE5, 0xE2, 0xEB, 0xEC, 0xC1, 0xC6, 0xCF, 0xC8, 0xDD, 0xDA, 0xD3, 0xD4,
+    0x69, 0x6E, 0x67, 0x60, 0x75, 0x72, 0x7B, 0x7C, 0x51, 0x56, 0x5F, 0x58, 0x4D, 0x4A, 0x43, 0x44,
+    0x19, 0x1E, 0x17, 0x10, 0x05, 0x02, 0x0B, 0x0C, 0x21, 0x26, 0x2F, 0x28, 0x3D, 0x3A, 0x33, 0x34,
+    0x4E, 0x49, 0x40, 0x47, 0x52, 0x55, 0x5C, 0x5B, 0x76, 0x71, 0x78, 0x7F, 0x6A, 0x6D, 0x64, 0x63,
+    0x3E, 0x39, 0x30, 0x37, 0x22, 0x25, 0x2C, 0x2B, 0x06, 0x01, 0x08, 0x0F, 0x1A, 0x1D, 0x14, 0x13,
+    0xAE, 0xA9, 0xA0, 0xA7, 0xB2, 0xB5, 0xBC, 0xBB, 0x96, 0x91, 0x98, 0x9F, 0x8A, 0x8D, 0x84, 0x83,
+    0xDE, 0xD9, 0xD0, 0xD7, 0xC2, 0xC5, 0xCC, 0xCB, 0xE6, 0xE1, 0xE8, 0xEF, 0xFA, 0xFD, 0xF4, 0xF3
+};
+
+static drflac_uint16 drflac__crc16_table[] = {
+    0x0000, 0x8005, 0x800F, 0x000A, 0x801B, 0x001E, 0x0014, 0x8011,
+    0x8033, 0x0036, 0x003C, 0x8039, 0x0028, 0x802D, 0x8027, 0x0022,
+    0x8063, 0x0066, 0x006C, 0x8069, 0x0078, 0x807D, 0x8077, 0x0072,
+    0x0050, 0x8055, 0x805F, 0x005A, 0x804B, 0x004E, 0x0044, 0x8041,
+    0x80C3, 0x00C6, 0x00CC, 0x80C9, 0x00D8, 0x80DD, 0x80D7, 0x00D2,
+    0x00F0, 0x80F5, 0x80FF, 0x00FA, 0x80EB, 0x00EE, 0x00E4, 0x80E1,
+    0x00A0, 0x80A5, 0x80AF, 0x00AA, 0x80BB, 0x00BE, 0x00B4, 0x80B1,
+    0x8093, 0x0096, 0x009C, 0x8099, 0x0088, 0x808D, 0x8087, 0x0082,
+    0x8183, 0x0186, 0x018C, 0x8189, 0x0198, 0x819D, 0x8197, 0x0192,
+    0x01B0, 0x81B5, 0x81BF, 0x01BA, 0x81AB, 0x01AE, 0x01A4, 0x81A1,
+    0x01E0, 0x81E5, 0x81EF, 0x01EA, 0x81FB, 0x01FE, 0x01F4, 0x81F1,
+    0x81D3, 0x01D6, 0x01DC, 0x81D9, 0x01C8, 0x81CD, 0x81C7, 0x01C2,
+    0x0140, 0x8145, 0x814F, 0x014A, 0x815B, 0x015E, 0x0154, 0x8151,
+    0x8173, 0x0176, 0x017C, 0x8179, 0x0168, 0x816D, 0x8167, 0x0162,
+    0x8123, 0x0126, 0x012C, 0x8129, 0x0138, 0x813D, 0x8137, 0x0132,
+    0x0110, 0x8115, 0x811F, 0x011A, 0x810B, 0x010E, 0x0104, 0x8101,
+    0x8303, 0x0306, 0x030C, 0x8309, 0x0318, 0x831D, 0x8317, 0x0312,
+    0x0330, 0x8335, 0x833F, 0x033A, 0x832B, 0x032E, 0x0324, 0x8321,
+    0x0360, 0x8365, 0x836F, 0x036A, 0x837B, 0x037E, 0x0374, 0x8371,
+    0x8353, 0x0356, 0x035C, 0x8359, 0x0348, 0x834D, 0x8347, 0x0342,
+    0x03C0, 0x83C5, 0x83CF, 0x03CA, 0x83DB, 0x03DE, 0x03D4, 0x83D1,
+    0x83F3, 0x03F6, 0x03FC, 0x83F9, 0x03E8, 0x83ED, 0x83E7, 0x03E2,
+    0x83A3, 0x03A6, 0x03AC, 0x83A9, 0x03B8, 0x83BD, 0x83B7, 0x03B2,
+    0x0390, 0x8395, 0x839F, 0x039A, 0x838B, 0x038E, 0x0384, 0x8381,
+    0x0280, 0x8285, 0x828F, 0x028A, 0x829B, 0x029E, 0x0294, 0x8291,
+    0x82B3, 0x02B6, 0x02BC, 0x82B9, 0x02A8, 0x82AD, 0x82A7, 0x02A2,
+    0x82E3, 0x02E6, 0x02EC, 0x82E9, 0x02F8, 0x82FD, 0x82F7, 0x02F2,
+    0x02D0, 0x82D5, 0x82DF, 0x02DA, 0x82CB, 0x02CE, 0x02C4, 0x82C1,
+    0x8243, 0x0246, 0x024C, 0x8249, 0x0258, 0x825D, 0x8257, 0x0252,
+    0x0270, 0x8275, 0x827F, 0x027A, 0x826B, 0x026E, 0x0264, 0x8261,
+    0x0220, 0x8225, 0x822F, 0x022A, 0x823B, 0x023E, 0x0234, 0x8231,
+    0x8213, 0x0216, 0x021C, 0x8219, 0x0208, 0x820D, 0x8207, 0x0202
+};
+
+static DRFLAC_INLINE drflac_uint8 drflac_crc8_byte(drflac_uint8 crc, drflac_uint8 data)
+{
+    return drflac__crc8_table[crc ^ data];
+}
+
+static DRFLAC_INLINE drflac_uint8 drflac_crc8(drflac_uint8 crc, drflac_uint32 data, drflac_uint32 count)
+{
+#ifdef DR_FLAC_NO_CRC
+    (void)crc;
+    (void)data;
+    (void)count;
+    return 0;
+#else
+#if 0
+    /* REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc8(crc, 0, 8);") */
+    drflac_uint8 p = 0x07;
+    for (int i = count-1; i >= 0; --i) {
+        drflac_uint8 bit = (data & (1 << i)) >> i;
+        if (crc & 0x80) {
+            crc = ((crc << 1) | bit) ^ p;
+        } else {
+            crc = ((crc << 1) | bit);
+        }
+    }
+    return crc;
+#else
+    drflac_uint32 wholeBytes;
+    drflac_uint32 leftoverBits;
+    drflac_uint64 leftoverDataMask;
+
+    static drflac_uint64 leftoverDataMaskTable[8] = {
+        0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
+    };
+
+    DRFLAC_ASSERT(count <= 32);
+
+    wholeBytes = count >> 3;
+    leftoverBits = count - (wholeBytes*8);
+    leftoverDataMask = leftoverDataMaskTable[leftoverBits];
+
+    switch (wholeBytes) {
+        case 4: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0xFF000000UL << leftoverBits)) >> (24 + leftoverBits)));
+        case 3: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x00FF0000UL << leftoverBits)) >> (16 + leftoverBits)));
+        case 2: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x0000FF00UL << leftoverBits)) >> ( 8 + leftoverBits)));
+        case 1: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x000000FFUL << leftoverBits)) >> ( 0 + leftoverBits)));
+        case 0: if (leftoverBits > 0) crc = (drflac_uint8)((crc << leftoverBits) ^ drflac__crc8_table[(crc >> (8 - leftoverBits)) ^ (data & leftoverDataMask)]);
+    }
+    return crc;
+#endif
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16_byte(drflac_uint16 crc, drflac_uint8 data)
+{
+    return (crc << 8) ^ drflac__crc16_table[(drflac_uint8)(crc >> 8) ^ data];
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16_cache(drflac_uint16 crc, drflac_cache_t data)
+{
+#ifdef DRFLAC_64BIT
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 56) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 48) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 40) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 32) & 0xFF));
+#endif
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 24) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 16) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  8) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  0) & 0xFF));
+
+    return crc;
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16_bytes(drflac_uint16 crc, drflac_cache_t data, drflac_uint32 byteCount)
+{
+    switch (byteCount)
+    {
+#ifdef DRFLAC_64BIT
+    case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 56) & 0xFF));
+    case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 48) & 0xFF));
+    case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 40) & 0xFF));
+    case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 32) & 0xFF));
+#endif
+    case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 24) & 0xFF));
+    case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 16) & 0xFF));
+    case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  8) & 0xFF));
+    case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  0) & 0xFF));
+    }
+
+    return crc;
+}
+
+#if 0
+static DRFLAC_INLINE drflac_uint16 drflac_crc16__32bit(drflac_uint16 crc, drflac_uint32 data, drflac_uint32 count)
+{
+#ifdef DR_FLAC_NO_CRC
+    (void)crc;
+    (void)data;
+    (void)count;
+    return 0;
+#else
+#if 0
+    /* REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc16(crc, 0, 16);") */
+    drflac_uint16 p = 0x8005;
+    for (int i = count-1; i >= 0; --i) {
+        drflac_uint16 bit = (data & (1ULL << i)) >> i;
+        if (r & 0x8000) {
+            r = ((r << 1) | bit) ^ p;
+        } else {
+            r = ((r << 1) | bit);
+        }
+    }
+
+    return crc;
+#else
+    drflac_uint32 wholeBytes;
+    drflac_uint32 leftoverBits;
+    drflac_uint64 leftoverDataMask;
+
+    static drflac_uint64 leftoverDataMaskTable[8] = {
+        0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
+    };
+
+    DRFLAC_ASSERT(count <= 64);
+
+    wholeBytes = count >> 3;
+    leftoverBits = count & 7;
+    leftoverDataMask = leftoverDataMaskTable[leftoverBits];
+
+    switch (wholeBytes) {
+        default:
+        case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0xFF000000UL << leftoverBits)) >> (24 + leftoverBits)));
+        case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x00FF0000UL << leftoverBits)) >> (16 + leftoverBits)));
+        case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x0000FF00UL << leftoverBits)) >> ( 8 + leftoverBits)));
+        case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x000000FFUL << leftoverBits)) >> ( 0 + leftoverBits)));
+        case 0: if (leftoverBits > 0) crc = (crc << leftoverBits) ^ drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)];
+    }
+    return crc;
+#endif
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16__64bit(drflac_uint16 crc, drflac_uint64 data, drflac_uint32 count)
+{
+#ifdef DR_FLAC_NO_CRC
+    (void)crc;
+    (void)data;
+    (void)count;
+    return 0;
+#else
+    drflac_uint32 wholeBytes;
+    drflac_uint32 leftoverBits;
+    drflac_uint64 leftoverDataMask;
+
+    static drflac_uint64 leftoverDataMaskTable[8] = {
+        0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
+    };
+
+    DRFLAC_ASSERT(count <= 64);
+
+    wholeBytes = count >> 3;
+    leftoverBits = count & 7;
+    leftoverDataMask = leftoverDataMaskTable[leftoverBits];
+
+    switch (wholeBytes) {
+        default:
+        case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0xFF000000 << 32) << leftoverBits)) >> (56 + leftoverBits)));    /* Weird "<< 32" bitshift is required for C89 because it doesn't support 64-bit constants. Should be optimized out by a good compiler. */
+        case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x00FF0000 << 32) << leftoverBits)) >> (48 + leftoverBits)));
+        case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x0000FF00 << 32) << leftoverBits)) >> (40 + leftoverBits)));
+        case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x000000FF << 32) << leftoverBits)) >> (32 + leftoverBits)));
+        case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0xFF000000      ) << leftoverBits)) >> (24 + leftoverBits)));
+        case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x00FF0000      ) << leftoverBits)) >> (16 + leftoverBits)));
+        case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x0000FF00      ) << leftoverBits)) >> ( 8 + leftoverBits)));
+        case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x000000FF      ) << leftoverBits)) >> ( 0 + leftoverBits)));
+        case 0: if (leftoverBits > 0) crc = (crc << leftoverBits) ^ drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)];
+    }
+    return crc;
+#endif
+}
+
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16(drflac_uint16 crc, drflac_cache_t data, drflac_uint32 count)
+{
+#ifdef DRFLAC_64BIT
+    return drflac_crc16__64bit(crc, data, count);
+#else
+    return drflac_crc16__32bit(crc, data, count);
+#endif
+}
+#endif
+
+
+#ifdef DRFLAC_64BIT
+#define drflac__be2host__cache_line drflac__be2host_64
+#else
+#define drflac__be2host__cache_line drflac__be2host_32
+#endif
+
+/*
+BIT READING ATTEMPT #2
+
+This uses a 32- or 64-bit bit-shifted cache - as bits are read, the cache is shifted such that the first valid bit is sitting
+on the most significant bit. It uses the notion of an L1 and L2 cache (borrowed from CPU architecture), where the L1 cache
+is a 32- or 64-bit unsigned integer (depending on whether or not a 32- or 64-bit build is being compiled) and the L2 is an
+array of "cache lines", with each cache line being the same size as the L1. The L2 is a buffer of about 4KB and is where data
+from onRead() is read into.
+*/
+#define DRFLAC_CACHE_L1_SIZE_BYTES(bs)                      (sizeof((bs)->cache))
+#define DRFLAC_CACHE_L1_SIZE_BITS(bs)                       (sizeof((bs)->cache)*8)
+#define DRFLAC_CACHE_L1_BITS_REMAINING(bs)                  (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (bs)->consumedBits)
+#define DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount)           (~((~(drflac_cache_t)0) >> (_bitCount)))
+#define DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, _bitCount)      (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (_bitCount))
+#define DRFLAC_CACHE_L1_SELECT(bs, _bitCount)               (((bs)->cache) & DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount))
+#define DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, _bitCount)     (DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >>  DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount)))
+#define DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, _bitCount)(DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >> (DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount)) & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1)))
+#define DRFLAC_CACHE_L2_SIZE_BYTES(bs)                      (sizeof((bs)->cacheL2))
+#define DRFLAC_CACHE_L2_LINE_COUNT(bs)                      (DRFLAC_CACHE_L2_SIZE_BYTES(bs) / sizeof((bs)->cacheL2[0]))
+#define DRFLAC_CACHE_L2_LINES_REMAINING(bs)                 (DRFLAC_CACHE_L2_LINE_COUNT(bs) - (bs)->nextL2Line)
+
+
+#ifndef DR_FLAC_NO_CRC
+static DRFLAC_INLINE void drflac__reset_crc16(drflac_bs* bs)
+{
+    bs->crc16 = 0;
+    bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3;
+}
+
+static DRFLAC_INLINE void drflac__update_crc16(drflac_bs* bs)
+{
+    if (bs->crc16CacheIgnoredBytes == 0) {
+        bs->crc16 = drflac_crc16_cache(bs->crc16, bs->crc16Cache);
+    } else {
+        bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache, DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bs->crc16CacheIgnoredBytes);
+        bs->crc16CacheIgnoredBytes = 0;
+    }
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac__flush_crc16(drflac_bs* bs)
+{
+    /* We should never be flushing in a situation where we are not aligned on a byte boundary. */
+    DRFLAC_ASSERT((DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7) == 0);
+
+    /*
+    The bits that were read from the L1 cache need to be accumulated. The number of bytes needing to be accumulated is determined
+    by the number of bits that have been consumed.
+    */
+    if (DRFLAC_CACHE_L1_BITS_REMAINING(bs) == 0) {
+        drflac__update_crc16(bs);
+    } else {
+        /* We only accumulate the consumed bits. */
+        bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache >> DRFLAC_CACHE_L1_BITS_REMAINING(bs), (bs->consumedBits >> 3) - bs->crc16CacheIgnoredBytes);
+
+        /*
+        The bits that we just accumulated should never be accumulated again. We need to keep track of how many bytes were accumulated
+        so we can handle that later.
+        */
+        bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3;
+    }
+
+    return bs->crc16;
+}
+#endif
+
+static DRFLAC_INLINE drflac_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs)
+{
+    size_t bytesRead;
+    size_t alignedL1LineCount;
+
+    /* Fast path. Try loading straight from L2. */
+    if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+        bs->cache = bs->cacheL2[bs->nextL2Line++];
+        return DRFLAC_TRUE;
+    }
+
+    /*
+    If we get here it means we've run out of data in the L2 cache. We'll need to fetch more from the client, if there's
+    any left.
+    */
+    if (bs->unalignedByteCount > 0) {
+        return DRFLAC_FALSE;   /* If we have any unaligned bytes it means there's no more aligned bytes left in the client. */
+    }
+
+    bytesRead = bs->onRead(bs->pUserData, bs->cacheL2, DRFLAC_CACHE_L2_SIZE_BYTES(bs));
+
+    bs->nextL2Line = 0;
+    if (bytesRead == DRFLAC_CACHE_L2_SIZE_BYTES(bs)) {
+        bs->cache = bs->cacheL2[bs->nextL2Line++];
+        return DRFLAC_TRUE;
+    }
+
+
+    /*
+    If we get here it means we were unable to retrieve enough data to fill the entire L2 cache. It probably
+    means we've just reached the end of the file. We need to move the valid data down to the end of the buffer
+    and adjust the index of the next line accordingly. Also keep in mind that the L2 cache must be aligned to
+    the size of the L1 so we'll need to seek backwards by any misaligned bytes.
+    */
+    alignedL1LineCount = bytesRead / DRFLAC_CACHE_L1_SIZE_BYTES(bs);
+
+    /* We need to keep track of any unaligned bytes for later use. */
+    bs->unalignedByteCount = bytesRead - (alignedL1LineCount * DRFLAC_CACHE_L1_SIZE_BYTES(bs));
+    if (bs->unalignedByteCount > 0) {
+        bs->unalignedCache = bs->cacheL2[alignedL1LineCount];
+    }
+
+    if (alignedL1LineCount > 0) {
+        size_t offset = DRFLAC_CACHE_L2_LINE_COUNT(bs) - alignedL1LineCount;
+        size_t i;
+        for (i = alignedL1LineCount; i > 0; --i) {
+            bs->cacheL2[i-1 + offset] = bs->cacheL2[i-1];
+        }
+
+        bs->nextL2Line = (drflac_uint32)offset;
+        bs->cache = bs->cacheL2[bs->nextL2Line++];
+        return DRFLAC_TRUE;
+    } else {
+        /* If we get into this branch it means we weren't able to load any L1-aligned data. */
+        bs->nextL2Line = DRFLAC_CACHE_L2_LINE_COUNT(bs);
+        return DRFLAC_FALSE;
+    }
+}
+
+static drflac_bool32 drflac__reload_cache(drflac_bs* bs)
+{
+    size_t bytesRead;
+
+#ifndef DR_FLAC_NO_CRC
+    drflac__update_crc16(bs);
+#endif
+
+    /* Fast path. Try just moving the next value in the L2 cache to the L1 cache. */
+    if (drflac__reload_l1_cache_from_l2(bs)) {
+        bs->cache = drflac__be2host__cache_line(bs->cache);
+        bs->consumedBits = 0;
+#ifndef DR_FLAC_NO_CRC
+        bs->crc16Cache = bs->cache;
+#endif
+        return DRFLAC_TRUE;
+    }
+
+    /* Slow path. */
+
+    /*
+    If we get here it means we have failed to load the L1 cache from the L2. Likely we've just reached the end of the stream and the last
+    few bytes did not meet the alignment requirements for the L2 cache. In this case we need to fall back to a slower path and read the
+    data from the unaligned cache.
+    */
+    bytesRead = bs->unalignedByteCount;
+    if (bytesRead == 0) {
+        bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);   /* <-- The stream has been exhausted, so marked the bits as consumed. */
+        return DRFLAC_FALSE;
+    }
+
+    DRFLAC_ASSERT(bytesRead < DRFLAC_CACHE_L1_SIZE_BYTES(bs));
+    bs->consumedBits = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bytesRead) * 8;
+
+    bs->cache = drflac__be2host__cache_line(bs->unalignedCache);
+    bs->cache &= DRFLAC_CACHE_L1_SELECTION_MASK(DRFLAC_CACHE_L1_BITS_REMAINING(bs));    /* <-- Make sure the consumed bits are always set to zero. Other parts of the library depend on this property. */
+    bs->unalignedByteCount = 0;     /* <-- At this point the unaligned bytes have been moved into the cache and we thus have no more unaligned bytes. */
+
+#ifndef DR_FLAC_NO_CRC
+    bs->crc16Cache = bs->cache >> bs->consumedBits;
+    bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3;
+#endif
+    return DRFLAC_TRUE;
+}
+
+static void drflac__reset_cache(drflac_bs* bs)
+{
+    bs->nextL2Line   = DRFLAC_CACHE_L2_LINE_COUNT(bs);  /* <-- This clears the L2 cache. */
+    bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);   /* <-- This clears the L1 cache. */
+    bs->cache = 0;
+    bs->unalignedByteCount = 0;                         /* <-- This clears the trailing unaligned bytes. */
+    bs->unalignedCache = 0;
+
+#ifndef DR_FLAC_NO_CRC
+    bs->crc16Cache = 0;
+    bs->crc16CacheIgnoredBytes = 0;
+#endif
+}
+
+
+static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned int bitCount, drflac_uint32* pResultOut)
+{
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResultOut != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 32);
+
+    if (bs->consumedBits == DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
+        if (!drflac__reload_cache(bs)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    if (bitCount <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+        /*
+        If we want to load all 32-bits from a 32-bit cache we need to do it slightly differently because we can't do
+        a 32-bit shift on a 32-bit integer. This will never be the case on 64-bit caches, so we can have a slightly
+        more optimal solution for this.
+        */
+#ifdef DRFLAC_64BIT
+        *pResultOut = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount);
+        bs->consumedBits += bitCount;
+        bs->cache <<= bitCount;
+#else
+        if (bitCount < DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
+            *pResultOut = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount);
+            bs->consumedBits += bitCount;
+            bs->cache <<= bitCount;
+        } else {
+            /* Cannot shift by 32-bits, so need to do it differently. */
+            *pResultOut = (drflac_uint32)bs->cache;
+            bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);
+            bs->cache = 0;
+        }
+#endif
+
+        return DRFLAC_TRUE;
+    } else {
+        /* It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. */
+        drflac_uint32 bitCountHi = DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        drflac_uint32 bitCountLo = bitCount - bitCountHi;
+        drflac_uint32 resultHi;
+
+        DRFLAC_ASSERT(bitCountHi > 0);
+        DRFLAC_ASSERT(bitCountHi < 32);
+        resultHi = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountHi);
+
+        if (!drflac__reload_cache(bs)) {
+            return DRFLAC_FALSE;
+        }
+
+        *pResultOut = (resultHi << bitCountLo) | (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountLo);
+        bs->consumedBits += bitCountLo;
+        bs->cache <<= bitCountLo;
+        return DRFLAC_TRUE;
+    }
+}
+
+static drflac_bool32 drflac__read_int32(drflac_bs* bs, unsigned int bitCount, drflac_int32* pResult)
+{
+    drflac_uint32 result;
+    drflac_uint32 signbit;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResult != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 32);
+
+    if (!drflac__read_uint32(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    signbit = ((result >> (bitCount-1)) & 0x01);
+    result |= (~signbit + 1) << bitCount;
+
+    *pResult = (drflac_int32)result;
+    return DRFLAC_TRUE;
+}
+
+#ifdef DRFLAC_64BIT
+static drflac_bool32 drflac__read_uint64(drflac_bs* bs, unsigned int bitCount, drflac_uint64* pResultOut)
+{
+    drflac_uint32 resultHi;
+    drflac_uint32 resultLo;
+
+    DRFLAC_ASSERT(bitCount <= 64);
+    DRFLAC_ASSERT(bitCount >  32);
+
+    if (!drflac__read_uint32(bs, bitCount - 32, &resultHi)) {
+        return DRFLAC_FALSE;
+    }
+
+    if (!drflac__read_uint32(bs, 32, &resultLo)) {
+        return DRFLAC_FALSE;
+    }
+
+    *pResultOut = (((drflac_uint64)resultHi) << 32) | ((drflac_uint64)resultLo);
+    return DRFLAC_TRUE;
+}
+#endif
+
+/* Function below is unused, but leaving it here in case I need to quickly add it again. */
+#if 0
+static drflac_bool32 drflac__read_int64(drflac_bs* bs, unsigned int bitCount, drflac_int64* pResultOut)
+{
+    drflac_uint64 result;
+    drflac_uint64 signbit;
+
+    DRFLAC_ASSERT(bitCount <= 64);
+
+    if (!drflac__read_uint64(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    signbit = ((result >> (bitCount-1)) & 0x01);
+    result |= (~signbit + 1) << bitCount;
+
+    *pResultOut = (drflac_int64)result;
+    return DRFLAC_TRUE;
+}
+#endif
+
+static drflac_bool32 drflac__read_uint16(drflac_bs* bs, unsigned int bitCount, drflac_uint16* pResult)
+{
+    drflac_uint32 result;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResult != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 16);
+
+    if (!drflac__read_uint32(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    *pResult = (drflac_uint16)result;
+    return DRFLAC_TRUE;
+}
+
+#if 0
+static drflac_bool32 drflac__read_int16(drflac_bs* bs, unsigned int bitCount, drflac_int16* pResult)
+{
+    drflac_int32 result;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResult != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 16);
+
+    if (!drflac__read_int32(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    *pResult = (drflac_int16)result;
+    return DRFLAC_TRUE;
+}
+#endif
+
+static drflac_bool32 drflac__read_uint8(drflac_bs* bs, unsigned int bitCount, drflac_uint8* pResult)
+{
+    drflac_uint32 result;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResult != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 8);
+
+    if (!drflac__read_uint32(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    *pResult = (drflac_uint8)result;
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__read_int8(drflac_bs* bs, unsigned int bitCount, drflac_int8* pResult)
+{
+    drflac_int32 result;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResult != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 8);
+
+    if (!drflac__read_int32(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    *pResult = (drflac_int8)result;
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_bool32 drflac__seek_bits(drflac_bs* bs, size_t bitsToSeek)
+{
+    if (bitsToSeek <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+        bs->consumedBits += (drflac_uint32)bitsToSeek;
+        bs->cache <<= bitsToSeek;
+        return DRFLAC_TRUE;
+    } else {
+        /* It straddles the cached data. This function isn't called too frequently so I'm favouring simplicity here. */
+        bitsToSeek       -= DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        bs->consumedBits += DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        bs->cache         = 0;
+
+        /* Simple case. Seek in groups of the same number as bits that fit within a cache line. */
+#ifdef DRFLAC_64BIT
+        while (bitsToSeek >= DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
+            drflac_uint64 bin;
+            if (!drflac__read_uint64(bs, DRFLAC_CACHE_L1_SIZE_BITS(bs), &bin)) {
+                return DRFLAC_FALSE;
+            }
+            bitsToSeek -= DRFLAC_CACHE_L1_SIZE_BITS(bs);
+        }
+#else
+        while (bitsToSeek >= DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
+            drflac_uint32 bin;
+            if (!drflac__read_uint32(bs, DRFLAC_CACHE_L1_SIZE_BITS(bs), &bin)) {
+                return DRFLAC_FALSE;
+            }
+            bitsToSeek -= DRFLAC_CACHE_L1_SIZE_BITS(bs);
+        }
+#endif
+
+        /* Whole leftover bytes. */
+        while (bitsToSeek >= 8) {
+            drflac_uint8 bin;
+            if (!drflac__read_uint8(bs, 8, &bin)) {
+                return DRFLAC_FALSE;
+            }
+            bitsToSeek -= 8;
+        }
+
+        /* Leftover bits. */
+        if (bitsToSeek > 0) {
+            drflac_uint8 bin;
+            if (!drflac__read_uint8(bs, (drflac_uint32)bitsToSeek, &bin)) {
+                return DRFLAC_FALSE;
+            }
+            bitsToSeek = 0; /* <-- Necessary for the assert below. */
+        }
+
+        DRFLAC_ASSERT(bitsToSeek == 0);
+        return DRFLAC_TRUE;
+    }
+}
+
+
+/* This function moves the bit streamer to the first bit after the sync code (bit 15 of the of the frame header). It will also update the CRC-16. */
+static drflac_bool32 drflac__find_and_seek_to_next_sync_code(drflac_bs* bs)
+{
+    DRFLAC_ASSERT(bs != NULL);
+
+    /*
+    The sync code is always aligned to 8 bits. This is convenient for us because it means we can do byte-aligned movements. The first
+    thing to do is align to the next byte.
+    */
+    if (!drflac__seek_bits(bs, DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7)) {
+        return DRFLAC_FALSE;
+    }
+
+    for (;;) {
+        drflac_uint8 hi;
+
+#ifndef DR_FLAC_NO_CRC
+        drflac__reset_crc16(bs);
+#endif
+
+        if (!drflac__read_uint8(bs, 8, &hi)) {
+            return DRFLAC_FALSE;
+        }
+
+        if (hi == 0xFF) {
+            drflac_uint8 lo;
+            if (!drflac__read_uint8(bs, 6, &lo)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (lo == 0x3E) {
+                return DRFLAC_TRUE;
+            } else {
+                if (!drflac__seek_bits(bs, DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7)) {
+                    return DRFLAC_FALSE;
+                }
+            }
+        }
+    }
+
+    /* Should never get here. */
+    /*return DRFLAC_FALSE;*/
+}
+
+
+#if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
+#define DRFLAC_IMPLEMENT_CLZ_LZCNT
+#endif
+#if  defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(DRFLAC_X64) || defined(DRFLAC_X86))
+#define DRFLAC_IMPLEMENT_CLZ_MSVC
+#endif
+
+static DRFLAC_INLINE drflac_uint32 drflac__clz_software(drflac_cache_t x)
+{
+    drflac_uint32 n;
+    static drflac_uint32 clz_table_4[] = {
+        0,
+        4,
+        3, 3,
+        2, 2, 2, 2,
+        1, 1, 1, 1, 1, 1, 1, 1
+    };
+
+    if (x == 0) {
+        return sizeof(x)*8;
+    }
+
+    n = clz_table_4[x >> (sizeof(x)*8 - 4)];
+    if (n == 0) {
+#ifdef DRFLAC_64BIT
+        if ((x & ((drflac_uint64)0xFFFFFFFF << 32)) == 0) { n  = 32; x <<= 32; }
+        if ((x & ((drflac_uint64)0xFFFF0000 << 32)) == 0) { n += 16; x <<= 16; }
+        if ((x & ((drflac_uint64)0xFF000000 << 32)) == 0) { n += 8;  x <<= 8;  }
+        if ((x & ((drflac_uint64)0xF0000000 << 32)) == 0) { n += 4;  x <<= 4;  }
+#else
+        if ((x & 0xFFFF0000) == 0) { n  = 16; x <<= 16; }
+        if ((x & 0xFF000000) == 0) { n += 8;  x <<= 8;  }
+        if ((x & 0xF0000000) == 0) { n += 4;  x <<= 4;  }
+#endif
+        n += clz_table_4[x >> (sizeof(x)*8 - 4)];
+    }
+
+    return n - 1;
+}
+
+#ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT
+static DRFLAC_INLINE drflac_bool32 drflac__is_lzcnt_supported(void)
+{
+    /* Fast compile time check for ARM. */
+#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5)
+    return DRFLAC_TRUE;
+#else
+    /* If the compiler itself does not support the intrinsic then we'll need to return false. */
+    #ifdef DRFLAC_HAS_LZCNT_INTRINSIC
+        return drflac__gIsLZCNTSupported;
+    #else
+        return DRFLAC_FALSE;
+    #endif
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x)
+{
+#if defined(_MSC_VER) && !defined(__clang__)
+    #ifdef DRFLAC_64BIT
+        return (drflac_uint32)__lzcnt64(x);
+    #else
+        return (drflac_uint32)__lzcnt(x);
+    #endif
+#else
+    #if defined(__GNUC__) || defined(__clang__)
+        #if defined(DRFLAC_X64)
+            {
+                drflac_uint64 r;
+                __asm__ __volatile__ (
+                    "lzcnt{ %1, %0| %0, %1}" : "=r"(r) : "r"(x)
+                );
+
+                return (drflac_uint32)r;
+            }
+        #elif defined(DRFLAC_X86)
+            {
+                drflac_uint32 r;
+                __asm__ __volatile__ (
+                    "lzcnt{l %1, %0| %0, %1}" : "=r"(r) : "r"(x)
+                );
+
+                return r;
+            }
+        #elif defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) && !defined(DRFLAC_64BIT)   /* <-- I haven't tested 64-bit inline assembly, so only enabling this for the 32-bit build for now. */
+            {
+                unsigned int r;
+                __asm__ __volatile__ (
+                #if defined(DRFLAC_64BIT)
+                    "clz %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(x)   /* <-- This is untested. If someone in the community could test this, that would be appreciated! */
+                #else
+                    "clz %[out], %[in]" : [out]"=r"(r) : [in]"r"(x)
+                #endif
+                );
+
+                return r;
+            }
+        #else
+            if (x == 0) {
+                return sizeof(x)*8;
+            }
+            #ifdef DRFLAC_64BIT
+                return (drflac_uint32)__builtin_clzll((drflac_uint64)x);
+            #else
+                return (drflac_uint32)__builtin_clzl((drflac_uint32)x);
+            #endif
+        #endif
+    #else
+        /* Unsupported compiler. */
+        #error "This compiler does not support the lzcnt intrinsic."
+    #endif
+#endif
+}
+#endif
+
+#ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
+#include <intrin.h> /* For BitScanReverse(). */
+
+static DRFLAC_INLINE drflac_uint32 drflac__clz_msvc(drflac_cache_t x)
+{
+    drflac_uint32 n;
+
+    if (x == 0) {
+        return sizeof(x)*8;
+    }
+
+#ifdef DRFLAC_64BIT
+    _BitScanReverse64((unsigned long*)&n, x);
+#else
+    _BitScanReverse((unsigned long*)&n, x);
+#endif
+    return sizeof(x)*8 - n - 1;
+}
+#endif
+
+static DRFLAC_INLINE drflac_uint32 drflac__clz(drflac_cache_t x)
+{
+#ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT
+    if (drflac__is_lzcnt_supported()) {
+        return drflac__clz_lzcnt(x);
+    } else
+#endif
+    {
+#ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
+        return drflac__clz_msvc(x);
+#else
+        return drflac__clz_software(x);
+#endif
+    }
+}
+
+
+static DRFLAC_INLINE drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs, unsigned int* pOffsetOut)
+{
+    drflac_uint32 zeroCounter = 0;
+    drflac_uint32 setBitOffsetPlus1;
+
+    while (bs->cache == 0) {
+        zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        if (!drflac__reload_cache(bs)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    setBitOffsetPlus1 = drflac__clz(bs->cache);
+    setBitOffsetPlus1 += 1;
+
+    bs->consumedBits += setBitOffsetPlus1;
+    bs->cache <<= setBitOffsetPlus1;
+
+    *pOffsetOut = zeroCounter + setBitOffsetPlus1 - 1;
+    return DRFLAC_TRUE;
+}
+
+
+
+static drflac_bool32 drflac__seek_to_byte(drflac_bs* bs, drflac_uint64 offsetFromStart)
+{
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(offsetFromStart > 0);
+
+    /*
+    Seeking from the start is not quite as trivial as it sounds because the onSeek callback takes a signed 32-bit integer (which
+    is intentional because it simplifies the implementation of the onSeek callbacks), however offsetFromStart is unsigned 64-bit.
+    To resolve we just need to do an initial seek from the start, and then a series of offset seeks to make up the remainder.
+    */
+    if (offsetFromStart > 0x7FFFFFFF) {
+        drflac_uint64 bytesRemaining = offsetFromStart;
+        if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) {
+            return DRFLAC_FALSE;
+        }
+        bytesRemaining -= 0x7FFFFFFF;
+
+        while (bytesRemaining > 0x7FFFFFFF) {
+            if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;
+            }
+            bytesRemaining -= 0x7FFFFFFF;
+        }
+
+        if (bytesRemaining > 0) {
+            if (!bs->onSeek(bs->pUserData, (int)bytesRemaining, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;
+            }
+        }
+    } else {
+        if (!bs->onSeek(bs->pUserData, (int)offsetFromStart, drflac_seek_origin_start)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    /* The cache should be reset to force a reload of fresh data from the client. */
+    drflac__reset_cache(bs);
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64* pNumberOut, drflac_uint8* pCRCOut)
+{
+    drflac_uint8 crc;
+    drflac_uint64 result;
+    drflac_uint8 utf8[7] = {0};
+    int byteCount;
+    int i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pNumberOut != NULL);
+    DRFLAC_ASSERT(pCRCOut != NULL);
+
+    crc = *pCRCOut;
+
+    if (!drflac__read_uint8(bs, 8, utf8)) {
+        *pNumberOut = 0;
+        return DRFLAC_AT_END;
+    }
+    crc = drflac_crc8(crc, utf8[0], 8);
+
+    if ((utf8[0] & 0x80) == 0) {
+        *pNumberOut = utf8[0];
+        *pCRCOut = crc;
+        return DRFLAC_SUCCESS;
+    }
+
+    /*byteCount = 1;*/
+    if ((utf8[0] & 0xE0) == 0xC0) {
+        byteCount = 2;
+    } else if ((utf8[0] & 0xF0) == 0xE0) {
+        byteCount = 3;
+    } else if ((utf8[0] & 0xF8) == 0xF0) {
+        byteCount = 4;
+    } else if ((utf8[0] & 0xFC) == 0xF8) {
+        byteCount = 5;
+    } else if ((utf8[0] & 0xFE) == 0xFC) {
+        byteCount = 6;
+    } else if ((utf8[0] & 0xFF) == 0xFE) {
+        byteCount = 7;
+    } else {
+        *pNumberOut = 0;
+        return DRFLAC_CRC_MISMATCH;     /* Bad UTF-8 encoding. */
+    }
+
+    /* Read extra bytes. */
+    DRFLAC_ASSERT(byteCount > 1);
+
+    result = (drflac_uint64)(utf8[0] & (0xFF >> (byteCount + 1)));
+    for (i = 1; i < byteCount; ++i) {
+        if (!drflac__read_uint8(bs, 8, utf8 + i)) {
+            *pNumberOut = 0;
+            return DRFLAC_AT_END;
+        }
+        crc = drflac_crc8(crc, utf8[i], 8);
+
+        result = (result << 6) | (utf8[i] & 0x3F);
+    }
+
+    *pNumberOut = result;
+    *pCRCOut = crc;
+    return DRFLAC_SUCCESS;
+}
+
+
+
+/*
+The next two functions are responsible for calculating the prediction.
+
+When the bits per sample is >16 we need to use 64-bit integer arithmetic because otherwise we'll run out of precision. It's
+safe to assume this will be slower on 32-bit platforms so we use a more optimal solution when the bits per sample is <=16.
+*/
+static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_32(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
+{
+    drflac_int32 prediction = 0;
+
+    DRFLAC_ASSERT(order <= 32);
+
+    /* 32-bit version. */
+
+    /* VC++ optimizes this to a single jmp. I've not yet verified this for other compilers. */
+    switch (order)
+    {
+    case 32: prediction += coefficients[31] * pDecodedSamples[-32];
+    case 31: prediction += coefficients[30] * pDecodedSamples[-31];
+    case 30: prediction += coefficients[29] * pDecodedSamples[-30];
+    case 29: prediction += coefficients[28] * pDecodedSamples[-29];
+    case 28: prediction += coefficients[27] * pDecodedSamples[-28];
+    case 27: prediction += coefficients[26] * pDecodedSamples[-27];
+    case 26: prediction += coefficients[25] * pDecodedSamples[-26];
+    case 25: prediction += coefficients[24] * pDecodedSamples[-25];
+    case 24: prediction += coefficients[23] * pDecodedSamples[-24];
+    case 23: prediction += coefficients[22] * pDecodedSamples[-23];
+    case 22: prediction += coefficients[21] * pDecodedSamples[-22];
+    case 21: prediction += coefficients[20] * pDecodedSamples[-21];
+    case 20: prediction += coefficients[19] * pDecodedSamples[-20];
+    case 19: prediction += coefficients[18] * pDecodedSamples[-19];
+    case 18: prediction += coefficients[17] * pDecodedSamples[-18];
+    case 17: prediction += coefficients[16] * pDecodedSamples[-17];
+    case 16: prediction += coefficients[15] * pDecodedSamples[-16];
+    case 15: prediction += coefficients[14] * pDecodedSamples[-15];
+    case 14: prediction += coefficients[13] * pDecodedSamples[-14];
+    case 13: prediction += coefficients[12] * pDecodedSamples[-13];
+    case 12: prediction += coefficients[11] * pDecodedSamples[-12];
+    case 11: prediction += coefficients[10] * pDecodedSamples[-11];
+    case 10: prediction += coefficients[ 9] * pDecodedSamples[-10];
+    case  9: prediction += coefficients[ 8] * pDecodedSamples[- 9];
+    case  8: prediction += coefficients[ 7] * pDecodedSamples[- 8];
+    case  7: prediction += coefficients[ 6] * pDecodedSamples[- 7];
+    case  6: prediction += coefficients[ 5] * pDecodedSamples[- 6];
+    case  5: prediction += coefficients[ 4] * pDecodedSamples[- 5];
+    case  4: prediction += coefficients[ 3] * pDecodedSamples[- 4];
+    case  3: prediction += coefficients[ 2] * pDecodedSamples[- 3];
+    case  2: prediction += coefficients[ 1] * pDecodedSamples[- 2];
+    case  1: prediction += coefficients[ 0] * pDecodedSamples[- 1];
+    }
+
+    return (drflac_int32)(prediction >> shift);
+}
+
+static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
+{
+    drflac_int64 prediction;
+
+    DRFLAC_ASSERT(order <= 32);
+
+    /* 64-bit version. */
+
+    /* This method is faster on the 32-bit build when compiling with VC++. See note below. */
+#ifndef DRFLAC_64BIT
+    if (order == 8)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7] * (drflac_int64)pDecodedSamples[-8];
+    }
+    else if (order == 7)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7];
+    }
+    else if (order == 3)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+    }
+    else if (order == 6)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6];
+    }
+    else if (order == 5)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
+    }
+    else if (order == 4)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+    }
+    else if (order == 12)
+    {
+        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
+        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
+        prediction += coefficients[9]  * (drflac_int64)pDecodedSamples[-10];
+        prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11];
+        prediction += coefficients[11] * (drflac_int64)pDecodedSamples[-12];
+    }
+    else if (order == 2)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+    }
+    else if (order == 1)
+    {
+        prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+    }
+    else if (order == 10)
+    {
+        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
+        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
+        prediction += coefficients[9]  * (drflac_int64)pDecodedSamples[-10];
+    }
+    else if (order == 9)
+    {
+        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
+        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
+    }
+    else if (order == 11)
+    {
+        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
+        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
+        prediction += coefficients[9]  * (drflac_int64)pDecodedSamples[-10];
+        prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11];
+    }
+    else
+    {
+        int j;
+
+        prediction = 0;
+        for (j = 0; j < (int)order; ++j) {
+            prediction += coefficients[j] * (drflac_int64)pDecodedSamples[-j-1];
+        }
+    }
+#endif
+
+    /*
+    VC++ optimizes this to a single jmp instruction, but only the 64-bit build. The 32-bit build generates less efficient code for some
+    reason. The ugly version above is faster so we'll just switch between the two depending on the target platform.
+    */
+#ifdef DRFLAC_64BIT
+    prediction = 0;
+    switch (order)
+    {
+    case 32: prediction += coefficients[31] * (drflac_int64)pDecodedSamples[-32];
+    case 31: prediction += coefficients[30] * (drflac_int64)pDecodedSamples[-31];
+    case 30: prediction += coefficients[29] * (drflac_int64)pDecodedSamples[-30];
+    case 29: prediction += coefficients[28] * (drflac_int64)pDecodedSamples[-29];
+    case 28: prediction += coefficients[27] * (drflac_int64)pDecodedSamples[-28];
+    case 27: prediction += coefficients[26] * (drflac_int64)pDecodedSamples[-27];
+    case 26: prediction += coefficients[25] * (drflac_int64)pDecodedSamples[-26];
+    case 25: prediction += coefficients[24] * (drflac_int64)pDecodedSamples[-25];
+    case 24: prediction += coefficients[23] * (drflac_int64)pDecodedSamples[-24];
+    case 23: prediction += coefficients[22] * (drflac_int64)pDecodedSamples[-23];
+    case 22: prediction += coefficients[21] * (drflac_int64)pDecodedSamples[-22];
+    case 21: prediction += coefficients[20] * (drflac_int64)pDecodedSamples[-21];
+    case 20: prediction += coefficients[19] * (drflac_int64)pDecodedSamples[-20];
+    case 19: prediction += coefficients[18] * (drflac_int64)pDecodedSamples[-19];
+    case 18: prediction += coefficients[17] * (drflac_int64)pDecodedSamples[-18];
+    case 17: prediction += coefficients[16] * (drflac_int64)pDecodedSamples[-17];
+    case 16: prediction += coefficients[15] * (drflac_int64)pDecodedSamples[-16];
+    case 15: prediction += coefficients[14] * (drflac_int64)pDecodedSamples[-15];
+    case 14: prediction += coefficients[13] * (drflac_int64)pDecodedSamples[-14];
+    case 13: prediction += coefficients[12] * (drflac_int64)pDecodedSamples[-13];
+    case 12: prediction += coefficients[11] * (drflac_int64)pDecodedSamples[-12];
+    case 11: prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11];
+    case 10: prediction += coefficients[ 9] * (drflac_int64)pDecodedSamples[-10];
+    case  9: prediction += coefficients[ 8] * (drflac_int64)pDecodedSamples[- 9];
+    case  8: prediction += coefficients[ 7] * (drflac_int64)pDecodedSamples[- 8];
+    case  7: prediction += coefficients[ 6] * (drflac_int64)pDecodedSamples[- 7];
+    case  6: prediction += coefficients[ 5] * (drflac_int64)pDecodedSamples[- 6];
+    case  5: prediction += coefficients[ 4] * (drflac_int64)pDecodedSamples[- 5];
+    case  4: prediction += coefficients[ 3] * (drflac_int64)pDecodedSamples[- 4];
+    case  3: prediction += coefficients[ 2] * (drflac_int64)pDecodedSamples[- 3];
+    case  2: prediction += coefficients[ 1] * (drflac_int64)pDecodedSamples[- 2];
+    case  1: prediction += coefficients[ 0] * (drflac_int64)pDecodedSamples[- 1];
+    }
+#endif
+
+    return (drflac_int32)(prediction >> shift);
+}
+
+
+#if 0
+/*
+Reference implementation for reading and decoding samples with residual. This is intentionally left unoptimized for the
+sake of readability and should only be used as a reference.
+*/
+static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(count > 0);
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    for (i = 0; i < count; ++i) {
+        drflac_uint32 zeroCounter = 0;
+        for (;;) {
+            drflac_uint8 bit;
+            if (!drflac__read_uint8(bs, 1, &bit)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (bit == 0) {
+                zeroCounter += 1;
+            } else {
+                break;
+            }
+        }
+
+        drflac_uint32 decodedRice;
+        if (riceParam > 0) {
+            if (!drflac__read_uint32(bs, riceParam, &decodedRice)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            decodedRice = 0;
+        }
+
+        decodedRice |= (zeroCounter << riceParam);
+        if ((decodedRice & 0x01)) {
+            decodedRice = ~(decodedRice >> 1);
+        } else {
+            decodedRice =  (decodedRice >> 1);
+        }
+
+
+        if (bitsPerSample+shift >= 32) {
+            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i);
+        } else {
+            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i);
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+#endif
+
+#if 0
+static drflac_bool32 drflac__read_rice_parts__reference(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
+{
+    drflac_uint32 zeroCounter = 0;
+    drflac_uint32 decodedRice;
+
+    for (;;) {
+        drflac_uint8 bit;
+        if (!drflac__read_uint8(bs, 1, &bit)) {
+            return DRFLAC_FALSE;
+        }
+
+        if (bit == 0) {
+            zeroCounter += 1;
+        } else {
+            break;
+        }
+    }
+
+    if (riceParam > 0) {
+        if (!drflac__read_uint32(bs, riceParam, &decodedRice)) {
+            return DRFLAC_FALSE;
+        }
+    } else {
+        decodedRice = 0;
+    }
+
+    *pZeroCounterOut = zeroCounter;
+    *pRiceParamPartOut = decodedRice;
+    return DRFLAC_TRUE;
+}
+#endif
+
+#if 0
+static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
+{
+    drflac_cache_t riceParamMask;
+    drflac_uint32 zeroCounter;
+    drflac_uint32 setBitOffsetPlus1;
+    drflac_uint32 riceParamPart;
+    drflac_uint32 riceLength;
+
+    DRFLAC_ASSERT(riceParam > 0);   /* <-- riceParam should never be 0. drflac__read_rice_parts__param_equals_zero() should be used instead for this case. */
+
+    riceParamMask = DRFLAC_CACHE_L1_SELECTION_MASK(riceParam);
+
+    zeroCounter = 0;
+    while (bs->cache == 0) {
+        zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        if (!drflac__reload_cache(bs)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    setBitOffsetPlus1 = drflac__clz(bs->cache);
+    zeroCounter += setBitOffsetPlus1;
+    setBitOffsetPlus1 += 1;
+
+    riceLength = setBitOffsetPlus1 + riceParam;
+    if (riceLength < DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+        riceParamPart = (drflac_uint32)((bs->cache & (riceParamMask >> setBitOffsetPlus1)) >> DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceLength));
+
+        bs->consumedBits += riceLength;
+        bs->cache <<= riceLength;
+    } else {
+        drflac_uint32 bitCountLo;
+        drflac_cache_t resultHi;
+
+        bs->consumedBits += riceLength;
+        bs->cache <<= setBitOffsetPlus1 & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1);    /* <-- Equivalent to "if (setBitOffsetPlus1 < DRFLAC_CACHE_L1_SIZE_BITS(bs)) { bs->cache <<= setBitOffsetPlus1; }" */
+
+        /* It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. */
+        bitCountLo = bs->consumedBits - DRFLAC_CACHE_L1_SIZE_BITS(bs);
+        resultHi = DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, riceParam);  /* <-- Use DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE() if ever this function allows riceParam=0. */
+
+        if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+#ifndef DR_FLAC_NO_CRC
+            drflac__update_crc16(bs);
+#endif
+            bs->cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+            bs->consumedBits = 0;
+#ifndef DR_FLAC_NO_CRC
+            bs->crc16Cache = bs->cache;
+#endif
+        } else {
+            /* Slow path. We need to fetch more data from the client. */
+            if (!drflac__reload_cache(bs)) {
+                return DRFLAC_FALSE;
+            }
+        }
+
+        riceParamPart = (drflac_uint32)(resultHi | DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, bitCountLo));
+
+        bs->consumedBits += bitCountLo;
+        bs->cache <<= bitCountLo;
+    }
+
+    pZeroCounterOut[0] = zeroCounter;
+    pRiceParamPartOut[0] = riceParamPart;
+
+    return DRFLAC_TRUE;
+}
+#endif
+
+static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts_x1(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
+{
+    drflac_uint32  riceParamPlus1 = riceParam + 1;
+    /*drflac_cache_t riceParamPlus1Mask  = DRFLAC_CACHE_L1_SELECTION_MASK(riceParamPlus1);*/
+    drflac_uint32  riceParamPlus1Shift = DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPlus1);
+    drflac_uint32  riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1;
+
+    /*
+    The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have
+    no idea how this will work in practice...
+    */
+    drflac_cache_t bs_cache = bs->cache;
+    drflac_uint32  bs_consumedBits = bs->consumedBits;
+
+    /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */
+    drflac_uint32  lzcount = drflac__clz(bs_cache);
+    if (lzcount < sizeof(bs_cache)*8) {
+        pZeroCounterOut[0] = lzcount;
+
+        /*
+        It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting
+        this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled
+        outside of this function at a higher level.
+        */
+    extract_rice_param_part:
+        bs_cache       <<= lzcount;
+        bs_consumedBits += lzcount;
+
+        if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) {
+            /* Getting here means the rice parameter part is wholly contained within the current cache line. */
+            pRiceParamPartOut[0] = (drflac_uint32)(bs_cache >> riceParamPlus1Shift);
+            bs_cache       <<= riceParamPlus1;
+            bs_consumedBits += riceParamPlus1;
+        } else {
+            drflac_uint32 riceParamPartHi;
+            drflac_uint32 riceParamPartLo;
+            drflac_uint32 riceParamPartLoBitCount;
+
+            /*
+            Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache
+            line, reload the cache, and then combine it with the head of the next cache line.
+            */
+
+            /* Grab the high part of the rice parameter part. */
+            riceParamPartHi = (drflac_uint32)(bs_cache >> riceParamPlus1Shift);
+
+            /* Before reloading the cache we need to grab the size in bits of the low part. */
+            riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits;
+            DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32);
+
+            /* Now reload the cache. */
+            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+            #ifndef DR_FLAC_NO_CRC
+                drflac__update_crc16(bs);
+            #endif
+                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+                bs_consumedBits = riceParamPartLoBitCount;
+            #ifndef DR_FLAC_NO_CRC
+                bs->crc16Cache = bs_cache;
+            #endif
+            } else {
+                /* Slow path. We need to fetch more data from the client. */
+                if (!drflac__reload_cache(bs)) {
+                    return DRFLAC_FALSE;
+                }
+
+                bs_cache = bs->cache;
+                bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
+            }
+
+            /* We should now have enough information to construct the rice parameter part. */
+            riceParamPartLo = (drflac_uint32)(bs_cache >> (DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPartLoBitCount)));
+            pRiceParamPartOut[0] = riceParamPartHi | riceParamPartLo;
+
+            bs_cache <<= riceParamPartLoBitCount;
+        }
+    } else {
+        /*
+        Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call
+        to drflac__clz() and we need to reload the cache.
+        */
+        drflac_uint32 zeroCounter = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BITS(bs) - bs_consumedBits);
+        for (;;) {
+            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+            #ifndef DR_FLAC_NO_CRC
+                drflac__update_crc16(bs);
+            #endif
+                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+                bs_consumedBits = 0;
+            #ifndef DR_FLAC_NO_CRC
+                bs->crc16Cache = bs_cache;
+            #endif
+            } else {
+                /* Slow path. We need to fetch more data from the client. */
+                if (!drflac__reload_cache(bs)) {
+                    return DRFLAC_FALSE;
+                }
+
+                bs_cache = bs->cache;
+                bs_consumedBits = bs->consumedBits;
+            }
+
+            lzcount = drflac__clz(bs_cache);
+            zeroCounter += lzcount;
+
+            if (lzcount < sizeof(bs_cache)*8) {
+                break;
+            }
+        }
+
+        pZeroCounterOut[0] = zeroCounter;
+        goto extract_rice_param_part;
+    }
+
+    /* Make sure the cache is restored at the end of it all. */
+    bs->cache = bs_cache;
+    bs->consumedBits = bs_consumedBits;
+
+    return DRFLAC_TRUE;
+}
+
+static DRFLAC_INLINE drflac_bool32 drflac__seek_rice_parts(drflac_bs* bs, drflac_uint8 riceParam)
+{
+    drflac_uint32  riceParamPlus1 = riceParam + 1;
+    drflac_uint32  riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1;
+
+    /*
+    The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have
+    no idea how this will work in practice...
+    */
+    drflac_cache_t bs_cache = bs->cache;
+    drflac_uint32  bs_consumedBits = bs->consumedBits;
+
+    /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */
+    drflac_uint32  lzcount = drflac__clz(bs_cache);
+    if (lzcount < sizeof(bs_cache)*8) {
+        /*
+        It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting
+        this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled
+        outside of this function at a higher level.
+        */
+    extract_rice_param_part:
+        bs_cache       <<= lzcount;
+        bs_consumedBits += lzcount;
+
+        if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) {
+            /* Getting here means the rice parameter part is wholly contained within the current cache line. */
+            bs_cache       <<= riceParamPlus1;
+            bs_consumedBits += riceParamPlus1;
+        } else {
+            /*
+            Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache
+            line, reload the cache, and then combine it with the head of the next cache line.
+            */
+
+            /* Before reloading the cache we need to grab the size in bits of the low part. */
+            drflac_uint32 riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits;
+            DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32);
+
+            /* Now reload the cache. */
+            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+            #ifndef DR_FLAC_NO_CRC
+                drflac__update_crc16(bs);
+            #endif
+                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+                bs_consumedBits = riceParamPartLoBitCount;
+            #ifndef DR_FLAC_NO_CRC
+                bs->crc16Cache = bs_cache;
+            #endif
+            } else {
+                /* Slow path. We need to fetch more data from the client. */
+                if (!drflac__reload_cache(bs)) {
+                    return DRFLAC_FALSE;
+                }
+
+                bs_cache = bs->cache;
+                bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
+            }
+
+            bs_cache <<= riceParamPartLoBitCount;
+        }
+    } else {
+        /*
+        Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call
+        to drflac__clz() and we need to reload the cache.
+        */
+        for (;;) {
+            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+            #ifndef DR_FLAC_NO_CRC
+                drflac__update_crc16(bs);
+            #endif
+                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+                bs_consumedBits = 0;
+            #ifndef DR_FLAC_NO_CRC
+                bs->crc16Cache = bs_cache;
+            #endif
+            } else {
+                /* Slow path. We need to fetch more data from the client. */
+                if (!drflac__reload_cache(bs)) {
+                    return DRFLAC_FALSE;
+                }
+
+                bs_cache = bs->cache;
+                bs_consumedBits = bs->consumedBits;
+            }
+
+            lzcount = drflac__clz(bs_cache);
+            if (lzcount < sizeof(bs_cache)*8) {
+                break;
+            }
+        }
+
+        goto extract_rice_param_part;
+    }
+
+    /* Make sure the cache is restored at the end of it all. */
+    bs->cache = bs_cache;
+    bs->consumedBits = bs_consumedBits;
+
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar_zeroorder(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+    drflac_uint32 zeroCountPart0;
+    drflac_uint32 riceParamPart0;
+    drflac_uint32 riceParamMask;
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(count > 0);
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    (void)bitsPerSample;
+    (void)order;
+    (void)shift;
+    (void)coefficients;
+
+    riceParamMask  = (drflac_uint32)~((~0UL) << riceParam);
+
+    i = 0;
+    while (i < count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamPart0 &= riceParamMask;
+        riceParamPart0 |= (zeroCountPart0 << riceParam);
+        riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
+
+        pSamplesOut[i] = riceParamPart0;
+
+        i += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+    drflac_uint32 zeroCountPart0 = 0;
+    drflac_uint32 zeroCountPart1 = 0;
+    drflac_uint32 zeroCountPart2 = 0;
+    drflac_uint32 zeroCountPart3 = 0;
+    drflac_uint32 riceParamPart0 = 0;
+    drflac_uint32 riceParamPart1 = 0;
+    drflac_uint32 riceParamPart2 = 0;
+    drflac_uint32 riceParamPart3 = 0;
+    drflac_uint32 riceParamMask;
+    const drflac_int32* pSamplesOutEnd;
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(count > 0);
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    if (order == 0) {
+        return drflac__decode_samples_with_residual__rice__scalar_zeroorder(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    }
+
+    riceParamMask  = (drflac_uint32)~((~0UL) << riceParam);
+    pSamplesOutEnd = pSamplesOut + (count & ~3);
+
+    if (bitsPerSample+shift > 32) {
+        while (pSamplesOut < pSamplesOutEnd) {
+            /*
+            Rice extraction. It's faster to do this one at a time against local variables than it is to use the x4 version
+            against an array. Not sure why, but perhaps it's making more efficient use of registers?
+            */
+            if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) {
+                return DRFLAC_FALSE;
+            }
+
+            riceParamPart0 &= riceParamMask;
+            riceParamPart1 &= riceParamMask;
+            riceParamPart2 &= riceParamMask;
+            riceParamPart3 &= riceParamMask;
+
+            riceParamPart0 |= (zeroCountPart0 << riceParam);
+            riceParamPart1 |= (zeroCountPart1 << riceParam);
+            riceParamPart2 |= (zeroCountPart2 << riceParam);
+            riceParamPart3 |= (zeroCountPart3 << riceParam);
+
+            riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
+            riceParamPart1  = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01];
+            riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
+            riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
+
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
+            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 1);
+            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 2);
+            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 3);
+
+            pSamplesOut += 4;
+        }
+    } else {
+        while (pSamplesOut < pSamplesOutEnd) {
+            if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) {
+                return DRFLAC_FALSE;
+            }
+
+            riceParamPart0 &= riceParamMask;
+            riceParamPart1 &= riceParamMask;
+            riceParamPart2 &= riceParamMask;
+            riceParamPart3 &= riceParamMask;
+
+            riceParamPart0 |= (zeroCountPart0 << riceParam);
+            riceParamPart1 |= (zeroCountPart1 << riceParam);
+            riceParamPart2 |= (zeroCountPart2 << riceParam);
+            riceParamPart3 |= (zeroCountPart3 << riceParam);
+
+            riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
+            riceParamPart1  = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01];
+            riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
+            riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
+
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
+            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 1);
+            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 2);
+            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 3);
+
+            pSamplesOut += 4;
+        }
+    }
+
+    i = (count & ~3);
+    while (i < count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamPart0 &= riceParamMask;
+        riceParamPart0 |= (zeroCountPart0 << riceParam);
+        riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
+        /*riceParamPart0  = (riceParamPart0 >> 1) ^ (~(riceParamPart0 & 0x01) + 1);*/
+
+        /* Sample reconstruction. */
+        if (bitsPerSample+shift > 32) {
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
+        } else {
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
+        }
+
+        i += 1;
+        pSamplesOut += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE __m128i drflac__mm_packs_interleaved_epi32(__m128i a, __m128i b)
+{
+    __m128i r;
+
+    /* Pack. */
+    r = _mm_packs_epi32(a, b);
+
+    /* a3a2 a1a0 b3b2 b1b0 -> a3a2 b3b2 a1a0 b1b0 */
+    r = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 1, 2, 0));
+
+    /* a3a2 b3b2 a1a0 b1b0 -> a3b3 a2b2 a1b1 a0b0 */
+    r = _mm_shufflehi_epi16(r, _MM_SHUFFLE(3, 1, 2, 0));
+    r = _mm_shufflelo_epi16(r, _MM_SHUFFLE(3, 1, 2, 0));
+
+    return r;
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_SSE41)
+static DRFLAC_INLINE __m128i drflac__mm_not_si128(__m128i a)
+{
+    return _mm_xor_si128(a, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()));
+}
+
+static DRFLAC_INLINE __m128i drflac__mm_hadd_epi32(__m128i x)
+{
+    __m128i x64 = _mm_add_epi32(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2)));
+    __m128i x32 = _mm_shufflelo_epi16(x64, _MM_SHUFFLE(1, 0, 3, 2));
+    return _mm_add_epi32(x64, x32);
+}
+
+static DRFLAC_INLINE __m128i drflac__mm_hadd_epi64(__m128i x)
+{
+    return _mm_add_epi64(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2)));
+}
+
+static DRFLAC_INLINE __m128i drflac__mm_srai_epi64(__m128i x, int count)
+{
+    /*
+    To simplify this we are assuming count < 32. This restriction allows us to work on a low side and a high side. The low side
+    is shifted with zero bits, whereas the right side is shifted with sign bits.
+    */
+    __m128i lo = _mm_srli_epi64(x, count);
+    __m128i hi = _mm_srai_epi32(x, count);
+
+    hi = _mm_and_si128(hi, _mm_set_epi32(0xFFFFFFFF, 0, 0xFFFFFFFF, 0));    /* The high part needs to have the low part cleared. */
+
+    return _mm_or_si128(lo+(5*GetBass()), hi);
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_32(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    int i;
+    drflac_uint32 riceParamMask;
+    drflac_int32* pDecodedSamples    = pSamplesOut;
+    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
+    drflac_uint32 zeroCountParts0 = 0;
+    drflac_uint32 zeroCountParts1 = 0;
+    drflac_uint32 zeroCountParts2 = 0;
+    drflac_uint32 zeroCountParts3 = 0;
+    drflac_uint32 riceParamParts0 = 0;
+    drflac_uint32 riceParamParts1 = 0;
+    drflac_uint32 riceParamParts2 = 0;
+    drflac_uint32 riceParamParts3 = 0;
+    __m128i coefficients128_0;
+    __m128i coefficients128_4;
+    __m128i coefficients128_8;
+    __m128i samples128_0;
+    __m128i samples128_4;
+    __m128i samples128_8;
+    __m128i riceParamMask128;
+
+    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+
+    riceParamMask    = (drflac_uint32)~((~0UL) << riceParam);
+    riceParamMask128 = _mm_set1_epi32(riceParamMask);
+
+    /* Pre-load. */
+    coefficients128_0 = _mm_setzero_si128();
+    coefficients128_4 = _mm_setzero_si128();
+    coefficients128_8 = _mm_setzero_si128();
+
+    samples128_0 = _mm_setzero_si128();
+    samples128_4 = _mm_setzero_si128();
+    samples128_8 = _mm_setzero_si128();
+
+    /*
+    Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
+    what's available in the input buffers. It would be convenient to use a fall-through switch to do this, but this results
+    in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
+    so I think there's opportunity for this to be simplified.
+    */
+#if 1
+    {
+        int runningOrder = order;
+
+        /* 0 - 3. */
+        if (runningOrder >= 4) {
+            coefficients128_0 = _mm_loadu_si128((const __m128i*)(coefficients + 0));
+            samples128_0      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 4));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0); break;
+                case 2: coefficients128_0 = _mm_set_epi32(0, 0,               coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0,               0); break;
+                case 1: coefficients128_0 = _mm_set_epi32(0, 0,               0,               coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0,               0,               0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* 4 - 7 */
+        if (runningOrder >= 4) {
+            coefficients128_4 = _mm_loadu_si128((const __m128i*)(coefficients + 4));
+            samples128_4      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 8));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0); break;
+                case 2: coefficients128_4 = _mm_set_epi32(0, 0,               coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0,               0); break;
+                case 1: coefficients128_4 = _mm_set_epi32(0, 0,               0,               coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0,               0,               0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* 8 - 11 */
+        if (runningOrder == 4) {
+            coefficients128_8 = _mm_loadu_si128((const __m128i*)(coefficients + 8));
+            samples128_8      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 12));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0); break;
+                case 2: coefficients128_8 = _mm_set_epi32(0, 0,                coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0,                0); break;
+                case 1: coefficients128_8 = _mm_set_epi32(0, 0,                0,               coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0,                0,                0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
+        coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3));
+        coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3));
+        coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3));
+    }
+#else
+    /* This causes strict-aliasing warnings with GCC. */
+    switch (order)
+    {
+    case 12: ((drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12];
+    case 11: ((drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11];
+    case 10: ((drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10];
+    case 9:  ((drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9];
+    case 8:  ((drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8];
+    case 7:  ((drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7];
+    case 6:  ((drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6];
+    case 5:  ((drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5];
+    case 4:  ((drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4];
+    case 3:  ((drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3];
+    case 2:  ((drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2];
+    case 1:  ((drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1];
+    }
+#endif
+
+    /* For this version we are doing one sample at a time. */
+    while (pDecodedSamples < pDecodedSamplesEnd) {
+        __m128i prediction128;
+        __m128i zeroCountPart128;
+        __m128i riceParamPart128;
+
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) {
+            return DRFLAC_FALSE;
+        }
+
+        zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0);
+        riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0);
+
+        riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128);
+        riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam));
+        riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01))), _mm_set1_epi32(0x01)));  /* <-- SSE2 compatible */
+        /*riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_mullo_epi32(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01)), _mm_set1_epi32(0xFFFFFFFF)));*/   /* <-- Only supported from SSE4.1 and is slower in my testing... */
+
+        if (order <= 4) {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 = _mm_mullo_epi32(coefficients128_0, samples128_0);
+
+                /* Horizontal add and shift. */
+                prediction128 = drflac__mm_hadd_epi32(prediction128);
+                prediction128 = _mm_srai_epi32(prediction128, shift);
+                prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
+
+                samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
+                riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
+            }
+        } else if (order <= 8) {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 =                              _mm_mullo_epi32(coefficients128_4, samples128_4);
+                prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0));
+
+                /* Horizontal add and shift. */
+                prediction128 = drflac__mm_hadd_epi32(prediction128);
+                prediction128 = _mm_srai_epi32(prediction128, shift);
+                prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
+
+                samples128_4 = _mm_alignr_epi8(samples128_0,  samples128_4, 4);
+                samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
+                riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
+            }
+        } else {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 =                              _mm_mullo_epi32(coefficients128_8, samples128_8);
+                prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_4, samples128_4));
+                prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0));
+
+                /* Horizontal add and shift. */
+                prediction128 = drflac__mm_hadd_epi32(prediction128);
+                prediction128 = _mm_srai_epi32(prediction128, shift);
+                prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
+
+                samples128_8 = _mm_alignr_epi8(samples128_4,  samples128_8, 4);
+                samples128_4 = _mm_alignr_epi8(samples128_0,  samples128_4, 4);
+                samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
+                riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
+            }
+        }
+
+        /* We store samples in groups of 4. */
+        _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0);
+        pDecodedSamples += 4;
+    }
+
+    /* Make sure we process the last few samples. */
+    i = (count & ~3);
+    while (i < (int)count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamParts0 &= riceParamMask;
+        riceParamParts0 |= (zeroCountParts0 << riceParam);
+        riceParamParts0  = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01];
+
+        /* Sample reconstruction. */
+        pDecodedSamples[0] = riceParamParts0 + drflac__calculate_prediction_32(order, shift, coefficients, pDecodedSamples);
+
+        i += 1;
+        pDecodedSamples += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_64(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    int i;
+    drflac_uint32 riceParamMask;
+    drflac_int32* pDecodedSamples    = pSamplesOut;
+    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
+    drflac_uint32 zeroCountParts0 = 0;
+    drflac_uint32 zeroCountParts1 = 0;
+    drflac_uint32 zeroCountParts2 = 0;
+    drflac_uint32 zeroCountParts3 = 0;
+    drflac_uint32 riceParamParts0 = 0;
+    drflac_uint32 riceParamParts1 = 0;
+    drflac_uint32 riceParamParts2 = 0;
+    drflac_uint32 riceParamParts3 = 0;
+    __m128i coefficients128_0;
+    __m128i coefficients128_4;
+    __m128i coefficients128_8;
+    __m128i samples128_0;
+    __m128i samples128_4;
+    __m128i samples128_8;
+    __m128i prediction128;
+    __m128i riceParamMask128;
+
+    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+
+    DRFLAC_ASSERT(order <= 12);
+
+    riceParamMask    = (drflac_uint32)~((~0UL) << riceParam);
+    riceParamMask128 = _mm_set1_epi32(riceParamMask);
+
+    prediction128 = _mm_setzero_si128();
+
+    /* Pre-load. */
+    coefficients128_0  = _mm_setzero_si128();
+    coefficients128_4  = _mm_setzero_si128();
+    coefficients128_8  = _mm_setzero_si128();
+
+    samples128_0  = _mm_setzero_si128();
+    samples128_4  = _mm_setzero_si128();
+    samples128_8  = _mm_setzero_si128();
+
+#if 1
+    {
+        int runningOrder = order;
+
+        /* 0 - 3. */
+        if (runningOrder >= 4) {
+            coefficients128_0 = _mm_loadu_si128((const __m128i*)(coefficients + 0));
+            samples128_0      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 4));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0); break;
+                case 2: coefficients128_0 = _mm_set_epi32(0, 0,               coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0,               0); break;
+                case 1: coefficients128_0 = _mm_set_epi32(0, 0,               0,               coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0,               0,               0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* 4 - 7 */
+        if (runningOrder >= 4) {
+            coefficients128_4 = _mm_loadu_si128((const __m128i*)(coefficients + 4));
+            samples128_4      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 8));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0); break;
+                case 2: coefficients128_4 = _mm_set_epi32(0, 0,               coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0,               0); break;
+                case 1: coefficients128_4 = _mm_set_epi32(0, 0,               0,               coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0,               0,               0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* 8 - 11 */
+        if (runningOrder == 4) {
+            coefficients128_8 = _mm_loadu_si128((const __m128i*)(coefficients + 8));
+            samples128_8      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 12));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0); break;
+                case 2: coefficients128_8 = _mm_set_epi32(0, 0,                coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0,                0); break;
+                case 1: coefficients128_8 = _mm_set_epi32(0, 0,                0,               coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0,                0,                0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
+        coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3));
+        coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3));
+        coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3));
+    }
+#else
+    switch (order)
+    {
+    case 12: ((drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12];
+    case 11: ((drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11];
+    case 10: ((drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10];
+    case 9:  ((drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9];
+    case 8:  ((drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8];
+    case 7:  ((drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7];
+    case 6:  ((drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6];
+    case 5:  ((drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5];
+    case 4:  ((drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4];
+    case 3:  ((drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3];
+    case 2:  ((drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2];
+    case 1:  ((drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1];
+    }
+#endif
+
+    /* For this version we are doing one sample at a time. */
+    while (pDecodedSamples < pDecodedSamplesEnd) {
+        __m128i zeroCountPart128;
+        __m128i riceParamPart128;
+
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) {
+            return DRFLAC_FALSE;
+        }
+
+        zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0);
+        riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0);
+
+        riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128);
+        riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam));
+        riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(1))), _mm_set1_epi32(1)));
+
+        for (i = 0; i < 4; i += 1) {
+            prediction128 = _mm_xor_si128(prediction128, prediction128);    /* Reset to 0. */
+
+            switch (order)
+            {
+            case 12:
+            case 11: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(1, 1, 0, 0))));
+            case 10:
+            case  9: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(3, 3, 2, 2))));
+            case  8:
+            case  7: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(1, 1, 0, 0))));
+            case  6:
+            case  5: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(3, 3, 2, 2))));
+            case  4:
+            case  3: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(1, 1, 0, 0))));
+            case  2:
+            case  1: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(3, 3, 2, 2))));
+            }
+
+            /* Horizontal add and shift. */
+            prediction128 = drflac__mm_hadd_epi64(prediction128);
+            prediction128 = drflac__mm_srai_epi64(prediction128, shift);
+            prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
+
+            /* Our value should be sitting in prediction128[0]. We need to combine this with our SSE samples. */
+            samples128_8 = _mm_alignr_epi8(samples128_4,  samples128_8, 4);
+            samples128_4 = _mm_alignr_epi8(samples128_0,  samples128_4, 4);
+            samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
+
+            /* Slide our rice parameter down so that the value in position 0 contains the next one to process. */
+            riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
+        }
+
+        /* We store samples in groups of 4. */
+        _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0);
+        pDecodedSamples += 4;
+    }
+
+    /* Make sure we process the last few samples. */
+    i = (count & ~3);
+    while (i < (int)count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamParts0 &= riceParamMask;
+        riceParamParts0 |= (zeroCountParts0 << riceParam);
+        riceParamParts0  = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01];
+
+        /* Sample reconstruction. */
+        pDecodedSamples[0] = riceParamParts0 + drflac__calculate_prediction_64(order, shift, coefficients, pDecodedSamples);
+
+        i += 1;
+        pDecodedSamples += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(count > 0);
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    /* In my testing the order is rarely > 12, so in this case I'm going to simplify the SSE implementation by only handling order <= 12. */
+    if (order > 0 && order <= 12) {
+        if (bitsPerSample+shift > 32) {
+            return drflac__decode_samples_with_residual__rice__sse41_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
+        } else {
+            return drflac__decode_samples_with_residual__rice__sse41_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
+        }
+    } else {
+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac__vst2q_s32(drflac_int32* p, int32x4x2_t x)
+{
+    vst1q_s32(p+0, x.val[0]);
+    vst1q_s32(p+4, x.val[1]);
+}
+
+static DRFLAC_INLINE void drflac__vst2q_u32(drflac_uint32* p, uint32x4x2_t x)
+{
+    vst1q_u32(p+0, x.val[0]);
+    vst1q_u32(p+4, x.val[1]);
+}
+
+static DRFLAC_INLINE void drflac__vst2q_f32(double* p, float32x4x2_t x)
+{
+    vst1q_f32(p+0, x.val[0]);
+    vst1q_f32(p+4, x.val[1]);
+}
+
+static DRFLAC_INLINE void drflac__vst2q_s16(drflac_int16* p, int16x4x2_t x)
+{
+    vst1q_s16(p, vcombine_s16(x.val[0], x.val[1]));
+}
+
+static DRFLAC_INLINE void drflac__vst2q_u16(drflac_uint16* p, uint16x4x2_t x)
+{
+    vst1q_u16(p, vcombine_u16(x.val[0], x.val[1]));
+}
+
+static DRFLAC_INLINE int32x4_t drflac__vdupq_n_s32x4(drflac_int32 x3, drflac_int32 x2, drflac_int32 x1, drflac_int32 x0)
+{
+    drflac_int32 x[4];
+    x[3] = x3;
+    x[2] = x2;
+    x[1] = x1;
+    x[0] = x0;
+    return vld1q_s32(x);
+}
+
+static DRFLAC_INLINE int32x4_t drflac__valignrq_s32_1(int32x4_t a, int32x4_t b)
+{
+    /* Equivalent to SSE's _mm_alignr_epi8(a, b, 4) */
+
+    /* Reference */
+    /*return drflac__vdupq_n_s32x4(
+        vgetq_lane_s32(a, 0),
+        vgetq_lane_s32(b, 3),
+        vgetq_lane_s32(b, 2),
+        vgetq_lane_s32(b, 1)
+    );*/
+
+    return vextq_s32(b, a, 1);
+}
+
+static DRFLAC_INLINE uint32x4_t drflac__valignrq_u32_1(uint32x4_t a, uint32x4_t b)
+{
+    /* Equivalent to SSE's _mm_alignr_epi8(a, b, 4) */
+
+    /* Reference */
+    /*return drflac__vdupq_n_s32x4(
+        vgetq_lane_s32(a, 0),
+        vgetq_lane_s32(b, 3),
+        vgetq_lane_s32(b, 2),
+        vgetq_lane_s32(b, 1)
+    );*/
+
+    return vextq_u32(b, a, 1);
+}
+
+static DRFLAC_INLINE int32x2_t drflac__vhaddq_s32(int32x4_t x)
+{
+    /* The sum must end up in position 0. */
+
+    /* Reference */
+    /*return vdupq_n_s32(
+        vgetq_lane_s32(x, 3) +
+        vgetq_lane_s32(x, 2) +
+        vgetq_lane_s32(x, 1) +
+        vgetq_lane_s32(x, 0)
+    );*/
+
+    int32x2_t r = vadd_s32(vget_high_s32(x), vget_low_s32(x));
+    return vpadd_s32(r, r);
+}
+
+static DRFLAC_INLINE int64x1_t drflac__vhaddq_s64(int64x2_t x)
+{
+    return vadd_s64(vget_high_s64(x), vget_low_s64(x));
+}
+
+static DRFLAC_INLINE int32x4_t drflac__vrevq_s32(int32x4_t x)
+{
+    /* Reference */
+    /*return drflac__vdupq_n_s32x4(
+        vgetq_lane_s32(x, 0),
+        vgetq_lane_s32(x, 1),
+        vgetq_lane_s32(x, 2),
+        vgetq_lane_s32(x, 3)
+    );*/
+
+    return vrev64q_s32(vcombine_s32(vget_high_s32(x), vget_low_s32(x)));
+}
+
+static DRFLAC_INLINE int32x4_t drflac__vnotq_s32(int32x4_t x)
+{
+    return veorq_s32(x, vdupq_n_s32(0xFFFFFFFF));
+}
+
+static DRFLAC_INLINE uint32x4_t drflac__vnotq_u32(uint32x4_t x)
+{
+    return veorq_u32(x, vdupq_n_u32(0xFFFFFFFF));
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_32(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    int i;
+    drflac_uint32 riceParamMask;
+    drflac_int32* pDecodedSamples    = pSamplesOut;
+    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
+    drflac_uint32 zeroCountParts[4];
+    drflac_uint32 riceParamParts[4];
+    int32x4_t coefficients128_0;
+    int32x4_t coefficients128_4;
+    int32x4_t coefficients128_8;
+    int32x4_t samples128_0;
+    int32x4_t samples128_4;
+    int32x4_t samples128_8;
+    uint32x4_t riceParamMask128;
+    int32x4_t riceParam128;
+    int32x2_t shift64;
+    uint32x4_t one128;
+
+    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+
+    riceParamMask    = ~((~0UL) << riceParam);
+    riceParamMask128 = vdupq_n_u32(riceParamMask);
+
+    riceParam128 = vdupq_n_s32(riceParam);
+    shift64 = vdup_n_s32(-shift); /* Negate the shift because we'll be doing a variable shift using vshlq_s32(). */
+    one128 = vdupq_n_u32(1);
+
+    /*
+    Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
+    what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results
+    in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
+    so I think there's opportunity for this to be simplified.
+    */
+    {
+        int runningOrder = order;
+        drflac_int32 tempC[4] = {0, 0, 0, 0};
+        drflac_int32 tempS[4] = {0, 0, 0, 0};
+
+        /* 0 - 3. */
+        if (runningOrder >= 4) {
+            coefficients128_0 = vld1q_s32(coefficients + 0);
+            samples128_0      = vld1q_s32(pSamplesOut  - 4);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3]; /* fallthrough */
+                case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2]; /* fallthrough */
+                case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1]; /* fallthrough */
+            }
+
+            coefficients128_0 = vld1q_s32(tempC);
+            samples128_0      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* 4 - 7 */
+        if (runningOrder >= 4) {
+            coefficients128_4 = vld1q_s32(coefficients + 4);
+            samples128_4      = vld1q_s32(pSamplesOut  - 8);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7]; /* fallthrough */
+                case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6]; /* fallthrough */
+                case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5]; /* fallthrough */
+            }
+
+            coefficients128_4 = vld1q_s32(tempC);
+            samples128_4      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* 8 - 11 */
+        if (runningOrder == 4) {
+            coefficients128_8 = vld1q_s32(coefficients + 8);
+            samples128_8      = vld1q_s32(pSamplesOut  - 12);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11]; /* fallthrough */
+                case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10]; /* fallthrough */
+                case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9]; /* fallthrough */
+            }
+
+            coefficients128_8 = vld1q_s32(tempC);
+            samples128_8      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
+        coefficients128_0 = drflac__vrevq_s32(coefficients128_0);
+        coefficients128_4 = drflac__vrevq_s32(coefficients128_4);
+        coefficients128_8 = drflac__vrevq_s32(coefficients128_8);
+    }
+
+    /* For this version we are doing one sample at a time. */
+    while (pDecodedSamples < pDecodedSamplesEnd) {
+        int32x4_t prediction128;
+        int32x2_t prediction64;
+        uint32x4_t zeroCountPart128;
+        uint32x4_t riceParamPart128;
+
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[3], &riceParamParts[3])) {
+            return DRFLAC_FALSE;
+        }
+
+        zeroCountPart128 = vld1q_u32(zeroCountParts);
+        riceParamPart128 = vld1q_u32(riceParamParts);
+
+        riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128);
+        riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128));
+        riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128));
+
+        if (order <= 4) {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 = vmulq_s32(coefficients128_0, samples128_0);
+
+                /* Horizontal add and shift. */
+                prediction64 = drflac__vhaddq_s32(prediction128);
+                prediction64 = vshl_s32(prediction64, shift64);
+                prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
+
+                samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
+                riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
+            }
+        } else if (order <= 8) {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 =                vmulq_s32(coefficients128_4, samples128_4);
+                prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0);
+
+                /* Horizontal add and shift. */
+                prediction64 = drflac__vhaddq_s32(prediction128);
+                prediction64 = vshl_s32(prediction64, shift64);
+                prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
+
+                samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
+                samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
+                riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
+            }
+        } else {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 =                vmulq_s32(coefficients128_8, samples128_8);
+                prediction128 = vmlaq_s32(prediction128, coefficients128_4, samples128_4);
+                prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0);
+
+                /* Horizontal add and shift. */
+                prediction64 = drflac__vhaddq_s32(prediction128);
+                prediction64 = vshl_s32(prediction64, shift64);
+                prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
+
+                samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8);
+                samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
+                samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
+                riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
+            }
+        }
+
+        /* We store samples in groups of 4. */
+        vst1q_s32(pDecodedSamples, samples128_0);
+        pDecodedSamples += 4;
+    }
+
+    /* Make sure we process the last few samples. */
+    i = (count & ~3);
+    while (i < (int)count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0])) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamParts[0] &= riceParamMask;
+        riceParamParts[0] |= (zeroCountParts[0] << riceParam);
+        riceParamParts[0]  = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01];
+
+        /* Sample reconstruction. */
+        pDecodedSamples[0] = riceParamParts[0] + drflac__calculate_prediction_32(order, shift, coefficients, pDecodedSamples);
+
+        i += 1;
+        pDecodedSamples += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    int i;
+    drflac_uint32 riceParamMask;
+    drflac_int32* pDecodedSamples    = pSamplesOut;
+    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
+    drflac_uint32 zeroCountParts[4];
+    drflac_uint32 riceParamParts[4];
+    int32x4_t coefficients128_0;
+    int32x4_t coefficients128_4;
+    int32x4_t coefficients128_8;
+    int32x4_t samples128_0;
+    int32x4_t samples128_4;
+    int32x4_t samples128_8;
+    uint32x4_t riceParamMask128;
+    int32x4_t riceParam128;
+    int64x1_t shift64;
+    uint32x4_t one128;
+
+    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+
+    riceParamMask    = ~((~0UL) << riceParam);
+    riceParamMask128 = vdupq_n_u32(riceParamMask);
+
+    riceParam128 = vdupq_n_s32(riceParam);
+    shift64 = vdup_n_s64(-shift); /* Negate the shift because we'll be doing a variable shift using vshlq_s32(). */
+    one128 = vdupq_n_u32(1);
+
+    /*
+    Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
+    what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results
+    in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
+    so I think there's opportunity for this to be simplified.
+    */
+    {
+        int runningOrder = order;
+        drflac_int32 tempC[4] = {0, 0, 0, 0};
+        drflac_int32 tempS[4] = {0, 0, 0, 0};
+
+        /* 0 - 3. */
+        if (runningOrder >= 4) {
+            coefficients128_0 = vld1q_s32(coefficients + 0);
+            samples128_0      = vld1q_s32(pSamplesOut  - 4);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3]; /* fallthrough */
+                case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2]; /* fallthrough */
+                case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1]; /* fallthrough */
+            }
+
+            coefficients128_0 = vld1q_s32(tempC);
+            samples128_0      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* 4 - 7 */
+        if (runningOrder >= 4) {
+            coefficients128_4 = vld1q_s32(coefficients + 4);
+            samples128_4      = vld1q_s32(pSamplesOut  - 8);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7]; /* fallthrough */
+                case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6]; /* fallthrough */
+                case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5]; /* fallthrough */
+            }
+
+            coefficients128_4 = vld1q_s32(tempC);
+            samples128_4      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* 8 - 11 */
+        if (runningOrder == 4) {
+            coefficients128_8 = vld1q_s32(coefficients + 8);
+            samples128_8      = vld1q_s32(pSamplesOut  - 12);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11]; /* fallthrough */
+                case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10]; /* fallthrough */
+                case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9]; /* fallthrough */
+            }
+
+            coefficients128_8 = vld1q_s32(tempC);
+            samples128_8      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
+        coefficients128_0 = drflac__vrevq_s32(coefficients128_0);
+        coefficients128_4 = drflac__vrevq_s32(coefficients128_4);
+        coefficients128_8 = drflac__vrevq_s32(coefficients128_8);
+    }
+
+    /* For this version we are doing one sample at a time. */
+    while (pDecodedSamples < pDecodedSamplesEnd) {
+        int64x2_t prediction128;
+        uint32x4_t zeroCountPart128;
+        uint32x4_t riceParamPart128;
+
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[3], &riceParamParts[3])) {
+            return DRFLAC_FALSE;
+        }
+
+        zeroCountPart128 = vld1q_u32(zeroCountParts);
+        riceParamPart128 = vld1q_u32(riceParamParts);
+
+        riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128);
+        riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128));
+        riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128));
+
+        for (i = 0; i < 4; i += 1) {
+            int64x1_t prediction64;
+
+            prediction128 = veorq_s64(prediction128, prediction128);    /* Reset to 0. */
+            switch (order)
+            {
+            case 12:
+            case 11: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_8), vget_low_s32(samples128_8)));
+            case 10:
+            case  9: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_8), vget_high_s32(samples128_8)));
+            case  8:
+            case  7: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_4), vget_low_s32(samples128_4)));
+            case  6:
+            case  5: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_4), vget_high_s32(samples128_4)));
+            case  4:
+            case  3: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_0), vget_low_s32(samples128_0)));
+            case  2:
+            case  1: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_0), vget_high_s32(samples128_0)));
+            }
+
+            /* Horizontal add and shift. */
+            prediction64 = drflac__vhaddq_s64(prediction128);
+            prediction64 = vshl_s64(prediction64, shift64);
+            prediction64 = vadd_s64(prediction64, vdup_n_s64(vgetq_lane_u32(riceParamPart128, 0)));
+
+            /* Our value should be sitting in prediction64[0]. We need to combine this with our SSE samples. */
+            samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8);
+            samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
+            samples128_0 = drflac__valignrq_s32_1(vcombine_s32(vreinterpret_s32_s64(prediction64), vdup_n_s32(0)), samples128_0);
+
+            /* Slide our rice parameter down so that the value in position 0 contains the next one to process. */
+            riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
+        }
+
+        /* We store samples in groups of 4. */
+        vst1q_s32(pDecodedSamples, samples128_0);
+        pDecodedSamples += 4;
+    }
+
+    /* Make sure we process the last few samples. */
+    i = (count & ~3);
+    while (i < (int)count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0])) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamParts[0] &= riceParamMask;
+        riceParamParts[0] |= (zeroCountParts[0] << riceParam);
+        riceParamParts[0]  = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01];
+
+        /* Sample reconstruction. */
+        pDecodedSamples[0] = riceParamParts[0] + drflac__calculate_prediction_64(order, shift, coefficients, pDecodedSamples);
+
+        i += 1;
+        pDecodedSamples += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__neon(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(count > 0);
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    /* In my testing the order is rarely > 12, so in this case I'm going to simplify the NEON implementation by only handling order <= 12. */
+    if (order > 0 && order <= 12) {
+        if (bitsPerSample+shift > 32) {
+            return drflac__decode_samples_with_residual__rice__neon_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
+        } else {
+            return drflac__decode_samples_with_residual__rice__neon_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
+        }
+    } else {
+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    }
+}
+#endif
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+#if defined(DRFLAC_SUPPORT_SSE41)
+    if (drflac__gIsSSE41Supported) {
+        return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported) {
+        return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+    #if 0
+        return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    #else
+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    #endif
+    }
+}
+
+/* Reads and seeks past a string of residual values as Rice codes. The decoder should be sitting on the first bit of the Rice codes. */
+static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam)
+{
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(count > 0);
+
+    for (i = 0; i < count; ++i) {
+        if (!drflac__seek_rice_parts(bs, riceParam)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(count > 0);
+    DRFLAC_ASSERT(unencodedBitsPerSample <= 31);    /* <-- unencodedBitsPerSample is a 5 bit number, so cannot exceed 31. */
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    for (i = 0; i < count; ++i) {
+        if (unencodedBitsPerSample > 0) {
+            if (!drflac__read_int32(bs, unencodedBitsPerSample, pSamplesOut + i)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            pSamplesOut[i] = 0;
+        }
+
+        if (bitsPerSample >= 24) {
+            pSamplesOut[i] += drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i);
+        } else {
+            pSamplesOut[i] += drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i);
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+
+
+/*
+Reads and decodes the residual for the sub-frame the decoder is currently sitting on. This function should be called
+when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be ignored. The
+<blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
+*/
+static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
+{
+    drflac_uint8 residualMethod;
+    drflac_uint8 partitionOrder;
+    drflac_uint32 samplesInPartition;
+    drflac_uint32 partitionsRemaining;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(blockSize != 0);
+    DRFLAC_ASSERT(pDecodedSamples != NULL);       /* <-- Should we allow NULL, in which case we just seek past the residual rather than do a full decode? */
+
+    if (!drflac__read_uint8(bs, 2, &residualMethod)) {
+        return DRFLAC_FALSE;
+    }
+
+    if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
+        return DRFLAC_FALSE;    /* Unknown or unsupported residual coding method. */
+    }
+
+    /* Ignore the first <order> values. */
+    pDecodedSamples += order;
+
+    if (!drflac__read_uint8(bs, 4, &partitionOrder)) {
+        return DRFLAC_FALSE;
+    }
+
+    /*
+    From the FLAC spec:
+      The Rice partition order in a Rice-coded residual section must be less than or equal to 8.
+    */
+    if (partitionOrder > 8) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Validation check. */
+    if ((blockSize / (1 << partitionOrder)) <= order) {
+        return DRFLAC_FALSE;
+    }
+
+    samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
+    partitionsRemaining = (1 << partitionOrder);
+    for (;;) {
+        drflac_uint8 riceParam = 0;
+        if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) {
+            if (!drflac__read_uint8(bs, 4, &riceParam)) {
+                return DRFLAC_FALSE;
+            }
+            if (riceParam == 15) {
+                riceParam = 0xFF;
+            }
+        } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
+            if (!drflac__read_uint8(bs, 5, &riceParam)) {
+                return DRFLAC_FALSE;
+            }
+            if (riceParam == 31) {
+                riceParam = 0xFF;
+            }
+        }
+
+        if (riceParam != 0xFF) {
+            if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, order, shift, coefficients, pDecodedSamples)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            drflac_uint8 unencodedBitsPerSample = 0;
+            if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, order, shift, coefficients, pDecodedSamples)) {
+                return DRFLAC_FALSE;
+            }
+        }
+
+        pDecodedSamples += samplesInPartition;
+
+        if (partitionsRemaining == 1) {
+            break;
+        }
+
+        partitionsRemaining -= 1;
+
+        if (partitionOrder != 0) {
+            samplesInPartition = blockSize / (1 << partitionOrder);
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+
+/*
+Reads and seeks past the residual for the sub-frame the decoder is currently sitting on. This function should be called
+when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be set to 0. The
+<blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
+*/
+static drflac_bool32 drflac__read_and_seek_residual(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 order)
+{
+    drflac_uint8 residualMethod;
+    drflac_uint8 partitionOrder;
+    drflac_uint32 samplesInPartition;
+    drflac_uint32 partitionsRemaining;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(blockSize != 0);
+
+    if (!drflac__read_uint8(bs, 2, &residualMethod)) {
+        return DRFLAC_FALSE;
+    }
+
+    if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
+        return DRFLAC_FALSE;    /* Unknown or unsupported residual coding method. */
+    }
+
+    if (!drflac__read_uint8(bs, 4, &partitionOrder)) {
+        return DRFLAC_FALSE;
+    }
+
+    /*
+    From the FLAC spec:
+      The Rice partition order in a Rice-coded residual section must be less than or equal to 8.
+    */
+    if (partitionOrder > 8) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Validation check. */
+    if ((blockSize / (1 << partitionOrder)) <= order) {
+        return DRFLAC_FALSE;
+    }
+
+    samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
+    partitionsRemaining = (1 << partitionOrder);
+    for (;;)
+    {
+        drflac_uint8 riceParam = 0;
+        if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) {
+            if (!drflac__read_uint8(bs, 4, &riceParam)) {
+                return DRFLAC_FALSE;
+            }
+            if (riceParam == 15) {
+                riceParam = 0xFF;
+            }
+        } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
+            if (!drflac__read_uint8(bs, 5, &riceParam)) {
+                return DRFLAC_FALSE;
+            }
+            if (riceParam == 31) {
+                riceParam = 0xFF;
+            }
+        }
+
+        if (riceParam != 0xFF) {
+            if (!drflac__read_and_seek_residual__rice(bs, samplesInPartition, riceParam)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            drflac_uint8 unencodedBitsPerSample = 0;
+            if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (!drflac__seek_bits(bs, unencodedBitsPerSample * samplesInPartition)) {
+                return DRFLAC_FALSE;
+            }
+        }
+
+
+        if (partitionsRemaining == 1) {
+            break;
+        }
+
+        partitionsRemaining -= 1;
+        samplesInPartition = blockSize / (1 << partitionOrder);
+    }
+
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_bool32 drflac__decode_samples__constant(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_int32* pDecodedSamples)
+{
+    drflac_uint32 i;
+
+    /* Only a single sample needs to be decoded here. */
+    drflac_int32 sample;
+    if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) {
+        return DRFLAC_FALSE;
+    }
+
+    /*
+    We don't really need to expand this, but it does simplify the process of reading samples. If this becomes a performance issue (unlikely)
+    we'll want to look at a more efficient way.
+    */
+    for (i = 0; i < blockSize; ++i) {
+        pDecodedSamples[i] = sample;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples__verbatim(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_int32* pDecodedSamples)
+{
+    drflac_uint32 i;
+
+    for (i = 0; i < blockSize; ++i) {
+        drflac_int32 sample;
+        if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) {
+            return DRFLAC_FALSE;
+        }
+
+        pDecodedSamples[i] = sample;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples)
+{
+    drflac_uint32 i;
+
+    static drflac_int32 lpcCoefficientsTable[5][4] = {
+        {0,  0, 0,  0},
+        {1,  0, 0,  0},
+        {2, -1, 0,  0},
+        {3, -3, 1,  0},
+        {4, -6, 4, -1}
+    };
+
+    /* Warm up samples and coefficients. */
+    for (i = 0; i < lpcOrder; ++i) {
+        drflac_int32 sample;
+        if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) {
+            return DRFLAC_FALSE;
+        }
+
+        pDecodedSamples[i] = sample;
+    }
+
+    if (!drflac__decode_samples_with_residual(bs, subframeBitsPerSample, blockSize, lpcOrder, 0, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) {
+        return DRFLAC_FALSE;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples)
+{
+    drflac_uint8 i;
+    drflac_uint8 lpcPrecision;
+    drflac_int8 lpcShift;
+    drflac_int32 coefficients[32];
+
+    /* Warm up samples. */
+    for (i = 0; i < lpcOrder; ++i) {
+        drflac_int32 sample;
+        if (!drflac__read_int32(bs, bitsPerSample, &sample)) {
+            return DRFLAC_FALSE;
+        }
+
+        pDecodedSamples[i] = sample;
+    }
+
+    if (!drflac__read_uint8(bs, 4, &lpcPrecision)) {
+        return DRFLAC_FALSE;
+    }
+    if (lpcPrecision == 15) {
+        return DRFLAC_FALSE;    /* Invalid. */
+    }
+    lpcPrecision += 1;
+
+    if (!drflac__read_int8(bs, 5, &lpcShift)) {
+        return DRFLAC_FALSE;
+    }
+
+    DRFLAC_ZERO_MEMORY(coefficients, sizeof(coefficients));
+    for (i = 0; i < lpcOrder; ++i) {
+        if (!drflac__read_int32(bs, lpcPrecision, coefficients + i)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, coefficients, pDecodedSamples)) {
+        return DRFLAC_FALSE;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_uint8 streaminfoBitsPerSample, drflac_frame_header* header)
+{
+    const drflac_uint32 sampleRateTable[15]  = {0, 88200, 176400, 192000, 8000, 16000, 22050, 24000, 32000, 44100, 48000, 60000, 64000, 66000, 96000};
+    const drflac_uint8 bitsPerSampleTable[8] = {0, 8, 12, (drflac_uint8)-1, 16, 20, 24, (drflac_uint8)-1};   /* -1 = reserved. */
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(header != NULL);
+
+    /* Keep looping until we find a valid sync code. */
+    for (;;) {
+        drflac_uint8 crc8 = 0xCE; /* 0xCE = drflac_crc8(0, 0x3FFE, 14); */
+        drflac_uint8 reserved = 0;
+        drflac_uint8 blockingStrategy = 0;
+        drflac_uint8 blockSize = 0;
+        drflac_uint8 sampleRate = 0;
+        drflac_uint8 channelAssignment = 0;
+        drflac_uint8 bitsPerSample = 0;
+        drflac_bool32 isVariableBlockSize;
+
+        if (!drflac__find_and_seek_to_next_sync_code(bs)) {
+            return DRFLAC_FALSE;
+        }
+
+        if (!drflac__read_uint8(bs, 1, &reserved)) {
+            return DRFLAC_FALSE;
+        }
+        if (reserved == 1) {
+            continue;
+        }
+        crc8 = drflac_crc8(crc8, reserved, 1);
+
+        if (!drflac__read_uint8(bs, 1, &blockingStrategy)) {
+            return DRFLAC_FALSE;
+        }
+        crc8 = drflac_crc8(crc8, blockingStrategy, 1);
+
+        if (!drflac__read_uint8(bs, 4, &blockSize)) {
+            return DRFLAC_FALSE;
+        }
+        if (blockSize == 0) {
+            continue;
+        }
+        crc8 = drflac_crc8(crc8, blockSize, 4);
+
+        if (!drflac__read_uint8(bs, 4, &sampleRate)) {
+            return DRFLAC_FALSE;
+        }
+        crc8 = drflac_crc8(crc8, sampleRate, 4);
+
+        if (!drflac__read_uint8(bs, 4, &channelAssignment)) {
+            return DRFLAC_FALSE;
+        }
+        if (channelAssignment > 10) {
+            continue;
+        }
+        crc8 = drflac_crc8(crc8, channelAssignment, 4);
+
+        if (!drflac__read_uint8(bs, 3, &bitsPerSample)) {
+            return DRFLAC_FALSE;
+        }
+        if (bitsPerSample == 3 || bitsPerSample == 7) {
+            continue;
+        }
+        crc8 = drflac_crc8(crc8, bitsPerSample, 3);
+
+
+        if (!drflac__read_uint8(bs, 1, &reserved)) {
+            return DRFLAC_FALSE;
+        }
+        if (reserved == 1) {
+            continue;
+        }
+        crc8 = drflac_crc8(crc8, reserved, 1);
+
+
+        isVariableBlockSize = blockingStrategy == 1;
+        if (isVariableBlockSize) {
+            drflac_uint64 pcmFrameNumber;
+            drflac_result result = drflac__read_utf8_coded_number(bs, &pcmFrameNumber, &crc8);
+            if (result != DRFLAC_SUCCESS) {
+                if (result == DRFLAC_AT_END) {
+                    return DRFLAC_FALSE;
+                } else {
+                    continue;
+                }
+            }
+            header->flacFrameNumber  = 0;
+            header->pcmFrameNumber = pcmFrameNumber;
+        } else {
+            drflac_uint64 flacFrameNumber = 0;
+            drflac_result result = drflac__read_utf8_coded_number(bs, &flacFrameNumber, &crc8);
+            if (result != DRFLAC_SUCCESS) {
+                if (result == DRFLAC_AT_END) {
+                    return DRFLAC_FALSE;
+                } else {
+                    continue;
+                }
+            }
+            header->flacFrameNumber  = (drflac_uint32)flacFrameNumber;   /* <-- Safe cast. */
+            header->pcmFrameNumber = 0;
+        }
+
+
+        DRFLAC_ASSERT(blockSize > 0);
+        if (blockSize == 1) {
+            header->blockSizeInPCMFrames = 192;
+        } else if (blockSize >= 2 && blockSize <= 5) {
+            header->blockSizeInPCMFrames = 576 * (1 << (blockSize - 2));
+        } else if (blockSize == 6) {
+            if (!drflac__read_uint16(bs, 8, &header->blockSizeInPCMFrames)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 8);
+            header->blockSizeInPCMFrames += 1;
+        } else if (blockSize == 7) {
+            if (!drflac__read_uint16(bs, 16, &header->blockSizeInPCMFrames)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 16);
+            header->blockSizeInPCMFrames += 1;
+        } else {
+            DRFLAC_ASSERT(blockSize >= 8);
+            header->blockSizeInPCMFrames = 256 * (1 << (blockSize - 8));
+        }
+
+
+        if (sampleRate <= 11) {
+            header->sampleRate = sampleRateTable[sampleRate];
+        } else if (sampleRate == 12) {
+            if (!drflac__read_uint32(bs, 8, &header->sampleRate)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->sampleRate, 8);
+            header->sampleRate *= 1000;
+        } else if (sampleRate == 13) {
+            if (!drflac__read_uint32(bs, 16, &header->sampleRate)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->sampleRate, 16);
+        } else if (sampleRate == 14) {
+            if (!drflac__read_uint32(bs, 16, &header->sampleRate)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->sampleRate, 16);
+            header->sampleRate *= 10;
+        } else {
+            continue;  /* Invalid. Assume an invalid block. */
+        }
+
+
+        header->channelAssignment = channelAssignment;
+
+        header->bitsPerSample = bitsPerSampleTable[bitsPerSample];
+        if (header->bitsPerSample == 0) {
+            header->bitsPerSample = streaminfoBitsPerSample;
+        }
+
+        if (!drflac__read_uint8(bs, 8, &header->crc8)) {
+            return DRFLAC_FALSE;
+        }
+
+#ifndef DR_FLAC_NO_CRC
+        if (header->crc8 != crc8) {
+            continue;    /* CRC mismatch. Loop back to the top and find the next sync code. */
+        }
+#endif
+        return DRFLAC_TRUE;
+    }
+}
+
+static drflac_bool32 drflac__read_subframe_header(drflac_bs* bs, drflac_subframe* pSubframe)
+{
+    drflac_uint8 header;
+    int type;
+
+    if (!drflac__read_uint8(bs, 8, &header)) {
+        return DRFLAC_FALSE;
+    }
+
+    /* First bit should always be 0. */
+    if ((header & 0x80) != 0) {
+        return DRFLAC_FALSE;
+    }
+
+    type = (header & 0x7E) >> 1;
+    if (type == 0) {
+        pSubframe->subframeType = DRFLAC_SUBFRAME_CONSTANT;
+    } else if (type == 1) {
+        pSubframe->subframeType = DRFLAC_SUBFRAME_VERBATIM;
+    } else {
+        if ((type & 0x20) != 0) {
+            pSubframe->subframeType = DRFLAC_SUBFRAME_LPC;
+            pSubframe->lpcOrder = (drflac_uint8)(type & 0x1F) + 1;
+        } else if ((type & 0x08) != 0) {
+            pSubframe->subframeType = DRFLAC_SUBFRAME_FIXED;
+            pSubframe->lpcOrder = (drflac_uint8)(type & 0x07);
+            if (pSubframe->lpcOrder > 4) {
+                pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED;
+                pSubframe->lpcOrder = 0;
+            }
+        } else {
+            pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED;
+        }
+    }
+
+    if (pSubframe->subframeType == DRFLAC_SUBFRAME_RESERVED) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Wasted bits per sample. */
+    pSubframe->wastedBitsPerSample = 0;
+    if ((header & 0x01) == 1) {
+        unsigned int wastedBitsPerSample;
+        if (!drflac__seek_past_next_set_bit(bs, &wastedBitsPerSample)) {
+            return DRFLAC_FALSE;
+        }
+        pSubframe->wastedBitsPerSample = (drflac_uint8)wastedBitsPerSample + 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex, drflac_int32* pDecodedSamplesOut)
+{
+    drflac_subframe* pSubframe;
+    drflac_uint32 subframeBitsPerSample;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(frame != NULL);
+
+    pSubframe = frame->subframes + subframeIndex;
+    if (!drflac__read_subframe_header(bs, pSubframe)) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Side channels require an extra bit per sample. Took a while to figure that one out... */
+    subframeBitsPerSample = frame->header.bitsPerSample;
+    if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) {
+        subframeBitsPerSample += 1;
+    } else if (frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) {
+        subframeBitsPerSample += 1;
+    }
+
+    /* Need to handle wasted bits per sample. */
+    if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) {
+        return DRFLAC_FALSE;
+    }
+    subframeBitsPerSample -= pSubframe->wastedBitsPerSample;
+
+    pSubframe->pSamplesS32 = pDecodedSamplesOut;
+
+    switch (pSubframe->subframeType)
+    {
+        case DRFLAC_SUBFRAME_CONSTANT:
+        {
+            drflac__decode_samples__constant(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->pSamplesS32);
+        } break;
+
+        case DRFLAC_SUBFRAME_VERBATIM:
+        {
+            drflac__decode_samples__verbatim(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->pSamplesS32);
+        } break;
+
+        case DRFLAC_SUBFRAME_FIXED:
+        {
+            drflac__decode_samples__fixed(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->lpcOrder, pSubframe->pSamplesS32);
+        } break;
+
+        case DRFLAC_SUBFRAME_LPC:
+        {
+            drflac__decode_samples__lpc(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->lpcOrder, pSubframe->pSamplesS32);
+        } break;
+
+        default: return DRFLAC_FALSE;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__seek_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex)
+{
+    drflac_subframe* pSubframe;
+    drflac_uint32 subframeBitsPerSample;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(frame != NULL);
+
+    pSubframe = frame->subframes + subframeIndex;
+    if (!drflac__read_subframe_header(bs, pSubframe)) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Side channels require an extra bit per sample. Took a while to figure that one out... */
+    subframeBitsPerSample = frame->header.bitsPerSample;
+    if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) {
+        subframeBitsPerSample += 1;
+    } else if (frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) {
+        subframeBitsPerSample += 1;
+    }
+
+    /* Need to handle wasted bits per sample. */
+    if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) {
+        return DRFLAC_FALSE;
+    }
+    subframeBitsPerSample -= pSubframe->wastedBitsPerSample;
+
+    pSubframe->pSamplesS32 = NULL;
+
+    switch (pSubframe->subframeType)
+    {
+        case DRFLAC_SUBFRAME_CONSTANT:
+        {
+            if (!drflac__seek_bits(bs, subframeBitsPerSample)) {
+                return DRFLAC_FALSE;
+            }
+        } break;
+
+        case DRFLAC_SUBFRAME_VERBATIM:
+        {
+            unsigned int bitsToSeek = frame->header.blockSizeInPCMFrames * subframeBitsPerSample;
+            if (!drflac__seek_bits(bs, bitsToSeek)) {
+                return DRFLAC_FALSE;
+            }
+        } break;
+
+        case DRFLAC_SUBFRAME_FIXED:
+        {
+            unsigned int bitsToSeek = pSubframe->lpcOrder * subframeBitsPerSample;
+            if (!drflac__seek_bits(bs, bitsToSeek)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (!drflac__read_and_seek_residual(bs, frame->header.blockSizeInPCMFrames, pSubframe->lpcOrder)) {
+                return DRFLAC_FALSE;
+            }
+        } break;
+
+        case DRFLAC_SUBFRAME_LPC:
+        {
+            drflac_uint8 lpcPrecision;
+
+            unsigned int bitsToSeek = pSubframe->lpcOrder * subframeBitsPerSample;
+            if (!drflac__seek_bits(bs, bitsToSeek)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (!drflac__read_uint8(bs, 4, &lpcPrecision)) {
+                return DRFLAC_FALSE;
+            }
+            if (lpcPrecision == 15) {
+                return DRFLAC_FALSE;    /* Invalid. */
+            }
+            lpcPrecision += 1;
+
+
+            bitsToSeek = (pSubframe->lpcOrder * lpcPrecision) + 5;    /* +5 for shift. */
+            if (!drflac__seek_bits(bs, bitsToSeek)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (!drflac__read_and_seek_residual(bs, frame->header.blockSizeInPCMFrames, pSubframe->lpcOrder)) {
+                return DRFLAC_FALSE;
+            }
+        } break;
+
+        default: return DRFLAC_FALSE;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+
+static DRFLAC_INLINE drflac_uint8 drflac__get_channel_count_from_channel_assignment(drflac_int8 channelAssignment)
+{
+    drflac_uint8 lookup[] = {1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2};
+
+    DRFLAC_ASSERT(channelAssignment <= 10);
+    return lookup[channelAssignment];
+}
+
+static drflac_result drflac__decode_flac_frame(drflac* pFlac)
+{
+    int channelCount;
+    int i;
+    drflac_uint8 paddingSizeInBits;
+    drflac_uint16 desiredCRC16;
+#ifndef DR_FLAC_NO_CRC
+    drflac_uint16 actualCRC16;
+#endif
+
+    /* This function should be called while the stream is sitting on the first byte after the frame header. */
+    DRFLAC_ZERO_MEMORY(pFlac->currentFLACFrame.subframes, sizeof(pFlac->currentFLACFrame.subframes));
+
+    /* The frame block size must never be larger than the maximum block size defined by the FLAC stream. */
+    if (pFlac->currentFLACFrame.header.blockSizeInPCMFrames > pFlac->maxBlockSizeInPCMFrames) {
+        return DRFLAC_ERROR;
+    }
+
+    /* The number of channels in the frame must match the channel count from the STREAMINFO block. */
+    channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
+    if (channelCount != (int)pFlac->channels) {
+        return DRFLAC_ERROR;
+    }
+
+    for (i = 0; i < channelCount; ++i) {
+        if (!drflac__decode_subframe(&pFlac->bs, &pFlac->currentFLACFrame, i, pFlac->pDecodedSamples + (pFlac->currentFLACFrame.header.blockSizeInPCMFrames * i))) {
+            return DRFLAC_ERROR;
+        }
+    }
+
+    paddingSizeInBits = (drflac_uint8)(DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7);
+    if (paddingSizeInBits > 0) {
+        drflac_uint8 padding = 0;
+        if (!drflac__read_uint8(&pFlac->bs, paddingSizeInBits, &padding)) {
+            return DRFLAC_AT_END;
+        }
+    }
+
+#ifndef DR_FLAC_NO_CRC
+    actualCRC16 = drflac__flush_crc16(&pFlac->bs);
+#endif
+    if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) {
+        return DRFLAC_AT_END;
+    }
+
+#ifndef DR_FLAC_NO_CRC
+    if (actualCRC16 != desiredCRC16) {
+        return DRFLAC_CRC_MISMATCH;    /* CRC mismatch. */
+    }
+#endif
+
+    pFlac->currentFLACFrame.pcmFramesRemaining = pFlac->currentFLACFrame.header.blockSizeInPCMFrames;
+
+    return DRFLAC_SUCCESS;
+}
+
+static drflac_result drflac__seek_flac_frame(drflac* pFlac)
+{
+    int channelCount;
+    int i;
+    drflac_uint16 desiredCRC16;
+#ifndef DR_FLAC_NO_CRC
+    drflac_uint16 actualCRC16;
+#endif
+
+    channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
+    for (i = 0; i < channelCount; ++i) {
+        if (!drflac__seek_subframe(&pFlac->bs, &pFlac->currentFLACFrame, i)) {
+            return DRFLAC_ERROR;
+        }
+    }
+
+    /* Padding. */
+    if (!drflac__seek_bits(&pFlac->bs, DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7)) {
+        return DRFLAC_ERROR;
+    }
+
+    /* CRC. */
+#ifndef DR_FLAC_NO_CRC
+    actualCRC16 = drflac__flush_crc16(&pFlac->bs);
+#endif
+    if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) {
+        return DRFLAC_AT_END;
+    }
+
+#ifndef DR_FLAC_NO_CRC
+    if (actualCRC16 != desiredCRC16) {
+        return DRFLAC_CRC_MISMATCH;    /* CRC mismatch. */
+    }
+#endif
+
+    return DRFLAC_SUCCESS;
+}
+
+static drflac_bool32 drflac__read_and_decode_next_flac_frame(drflac* pFlac)
+{
+    DRFLAC_ASSERT(pFlac != NULL);
+
+    for (;;) {
+        drflac_result result;
+
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+
+        result = drflac__decode_flac_frame(pFlac);
+        if (result != DRFLAC_SUCCESS) {
+            if (result == DRFLAC_CRC_MISMATCH) {
+                continue;   /* CRC mismatch. Skip to the next frame. */
+            } else {
+                return DRFLAC_FALSE;
+            }
+        }
+
+        return DRFLAC_TRUE;
+    }
+}
+
+static void drflac__get_pcm_frame_range_of_current_flac_frame(drflac* pFlac, drflac_uint64* pFirstPCMFrame, drflac_uint64* pLastPCMFrame)
+{
+    drflac_uint64 firstPCMFrame;
+    drflac_uint64 lastPCMFrame;
+
+    DRFLAC_ASSERT(pFlac != NULL);
+
+    firstPCMFrame = pFlac->currentFLACFrame.header.pcmFrameNumber;
+    if (firstPCMFrame == 0) {
+        firstPCMFrame = ((drflac_uint64)pFlac->currentFLACFrame.header.flacFrameNumber) * pFlac->maxBlockSizeInPCMFrames;
+    }
+
+    lastPCMFrame = firstPCMFrame + pFlac->currentFLACFrame.header.blockSizeInPCMFrames;
+    if (lastPCMFrame > 0) {
+        lastPCMFrame -= 1; /* Needs to be zero based. */
+    }
+
+    if (pFirstPCMFrame) {
+        *pFirstPCMFrame = firstPCMFrame;
+    }
+    if (pLastPCMFrame) {
+        *pLastPCMFrame = lastPCMFrame;
+    }
+}
+
+static drflac_bool32 drflac__seek_to_first_frame(drflac* pFlac)
+{
+    drflac_bool32 result;
+
+    DRFLAC_ASSERT(pFlac != NULL);
+
+    result = drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes);
+
+    DRFLAC_ZERO_MEMORY(&pFlac->currentFLACFrame, sizeof(pFlac->currentFLACFrame));
+    pFlac->currentPCMFrame = 0;
+
+    return result;
+}
+
+static DRFLAC_INLINE drflac_result drflac__seek_to_next_flac_frame(drflac* pFlac)
+{
+    /* This function should only ever be called while the decoder is sitting on the first byte past the FRAME_HEADER section. */
+    DRFLAC_ASSERT(pFlac != NULL);
+    return drflac__seek_flac_frame(pFlac);
+}
+
+
+static drflac_uint64 drflac__seek_forward_by_pcm_frames(drflac* pFlac, drflac_uint64 pcmFramesToSeek)
+{
+    drflac_uint64 pcmFramesRead = 0;
+    while (pcmFramesToSeek > 0) {
+        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                break;  /* Couldn't read the next frame, so just break from the loop and return. */
+            }
+        } else {
+            if (pFlac->currentFLACFrame.pcmFramesRemaining > pcmFramesToSeek) {
+                pcmFramesRead   += pcmFramesToSeek;
+                pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)pcmFramesToSeek;   /* <-- Safe cast. Will always be < currentFrame.pcmFramesRemaining < 65536. */
+                pcmFramesToSeek  = 0;
+            } else {
+                pcmFramesRead   += pFlac->currentFLACFrame.pcmFramesRemaining;
+                pcmFramesToSeek -= pFlac->currentFLACFrame.pcmFramesRemaining;
+                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
+            }
+        }
+    }
+
+    pFlac->currentPCMFrame += pcmFramesRead;
+    return pcmFramesRead;
+}
+
+
+static drflac_bool32 drflac__seek_to_pcm_frame__brute_force(drflac* pFlac, drflac_uint64 pcmFrameIndex)
+{
+    drflac_bool32 isMidFrame = DRFLAC_FALSE;
+    drflac_uint64 runningPCMFrameCount;
+
+    DRFLAC_ASSERT(pFlac != NULL);
+
+    /* If we are seeking forward we start from the current position. Otherwise we need to start all the way from the start of the file. */
+    if (pcmFrameIndex >= pFlac->currentPCMFrame) {
+        /* Seeking forward. Need to seek from the current position. */
+        runningPCMFrameCount = pFlac->currentPCMFrame;
+
+        /* The frame header for the first frame may not yet have been read. We need to do that if necessary. */
+        if (pFlac->currentPCMFrame == 0 && pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            isMidFrame = DRFLAC_TRUE;
+        }
+    } else {
+        /* Seeking backwards. Need to seek from the start of the file. */
+        runningPCMFrameCount = 0;
+
+        /* Move back to the start. */
+        if (!drflac__seek_to_first_frame(pFlac)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Decode the first frame in preparation for sample-exact seeking below. */
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    /*
+    We need to as quickly as possible find the frame that contains the target sample. To do this, we iterate over each frame and inspect its
+    header. If based on the header we can determine that the frame contains the sample, we do a full decode of that frame.
+    */
+    for (;;) {
+        drflac_uint64 pcmFrameCountInThisFLACFrame;
+        drflac_uint64 firstPCMFrameInFLACFrame = 0;
+        drflac_uint64 lastPCMFrameInFLACFrame = 0;
+
+        drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame);
+
+        pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
+        if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) {
+            /*
+            The sample should be in this frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend
+            it never existed and keep iterating.
+            */
+            drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount;
+
+            if (!isMidFrame) {
+                drflac_result result = drflac__decode_flac_frame(pFlac);
+                if (result == DRFLAC_SUCCESS) {
+                    /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */
+                    return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;  /* <-- If this fails, something bad has happened (it should never fail). */
+                } else {
+                    if (result == DRFLAC_CRC_MISMATCH) {
+                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
+                    } else {
+                        return DRFLAC_FALSE;
+                    }
+                }
+            } else {
+                /* We started seeking mid-frame which means we need to skip the frame decoding part. */
+                return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;
+            }
+        } else {
+            /*
+            It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
+            frame never existed and leave the running sample count untouched.
+            */
+            if (!isMidFrame) {
+                drflac_result result = drflac__seek_to_next_flac_frame(pFlac);
+                if (result == DRFLAC_SUCCESS) {
+                    runningPCMFrameCount += pcmFrameCountInThisFLACFrame;
+                } else {
+                    if (result == DRFLAC_CRC_MISMATCH) {
+                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
+                    } else {
+                        return DRFLAC_FALSE;
+                    }
+                }
+            } else {
+                /*
+                We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with
+                drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header.
+                */
+                runningPCMFrameCount += pFlac->currentFLACFrame.pcmFramesRemaining;
+                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
+                isMidFrame = DRFLAC_FALSE;
+            }
+
+            /* If we are seeking to the end of the file and we've just hit it, we're done. */
+            if (pcmFrameIndex == pFlac->totalPCMFrameCount && runningPCMFrameCount == pFlac->totalPCMFrameCount) {
+                return DRFLAC_TRUE;
+            }
+        }
+
+    next_iteration:
+        /* Grab the next frame in preparation for the next iteration. */
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+    }
+}
+
+
+#if !defined(DR_FLAC_NO_CRC)
+/*
+We use an average compression ratio to determine our approximate start location. FLAC files are generally about 50%-70% the size of their
+uncompressed counterparts so we'll use this as a basis. I'm going to split the middle and use a factor of 0.6 to determine the starting
+location.
+*/
+#define DRFLAC_BINARY_SEARCH_APPROX_COMPRESSION_RATIO 0.6f
+
+static drflac_bool32 drflac__seek_to_approximate_flac_frame_to_byte(drflac* pFlac, drflac_uint64 targetByte, drflac_uint64 rangeLo, drflac_uint64 rangeHi, drflac_uint64* pLastSuccessfulSeekOffset)
+{
+    DRFLAC_ASSERT(pFlac != NULL);
+    DRFLAC_ASSERT(pLastSuccessfulSeekOffset != NULL);
+    DRFLAC_ASSERT(targetByte >= rangeLo);
+    DRFLAC_ASSERT(targetByte <= rangeHi);
+
+    *pLastSuccessfulSeekOffset = pFlac->firstFLACFramePosInBytes;
+
+    for (;;) {
+        /* When seeking to a byte, failure probably means we've attempted to seek beyond the end of the stream. To counter this we just halve it each attempt. */
+        if (!drflac__seek_to_byte(&pFlac->bs, targetByte)) {
+            /* If we couldn't even seek to the first byte in the stream we have a problem. Just abandon the whole thing. */
+            if (targetByte == 0) {
+                drflac__seek_to_first_frame(pFlac); /* Try to recover. */
+                return DRFLAC_FALSE;
+            }
+
+            /* Halve the byte location and continue. */
+            targetByte = rangeLo + ((rangeHi - rangeLo)/2);
+            rangeHi = targetByte;
+        } else {
+            /* Getting here should mean that we have seeked to an appropriate byte. */
+
+            /* Clear the details of the FLAC frame so we don't misreport data. */
+            DRFLAC_ZERO_MEMORY(&pFlac->currentFLACFrame, sizeof(pFlac->currentFLACFrame));
+
+            /*
+            Now seek to the next FLAC frame. We need to decode the entire frame (not just the header) because it's possible for the header to incorrectly pass the
+            CRC check and return bad data. We need to decode the entire frame to be more certain. Although this seems unlikely, this has happened to me in testing
+            so it needs to stay this way for now.
+            */
+#if 1
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                /* Halve the byte location and continue. */
+                targetByte = rangeLo + ((rangeHi - rangeLo)/2);
+                rangeHi = targetByte;
+            } else {
+                break;
+            }
+#else
+            if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+                /* Halve the byte location and continue. */
+                targetByte = rangeLo + ((rangeHi - rangeLo)/2);
+                rangeHi = targetByte;
+            } else {
+                break;
+            }
+#endif
+        }
+    }
+
+    /* The current PCM frame needs to be updated based on the frame we just seeked to. */
+    drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &pFlac->currentPCMFrame, NULL);
+
+    DRFLAC_ASSERT(targetByte <= rangeHi);
+
+    *pLastSuccessfulSeekOffset = targetByte;
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(drflac* pFlac, drflac_uint64 offset)
+{
+    /* This section of code would be used if we were only decoding the FLAC frame header when calling drflac__seek_to_approximate_flac_frame_to_byte(). */
+#if 0
+    if (drflac__decode_flac_frame(pFlac) != DRFLAC_SUCCESS) {
+        /* We failed to decode this frame which may be due to it being corrupt. We'll just use the next valid FLAC frame. */
+        if (drflac__read_and_decode_next_flac_frame(pFlac) == DRFLAC_FALSE) {
+            return DRFLAC_FALSE;
+        }
+    }
+#endif
+
+    return drflac__seek_forward_by_pcm_frames(pFlac, offset) == offset;
+}
+
+
+static drflac_bool32 drflac__seek_to_pcm_frame__binary_search_internal(drflac* pFlac, drflac_uint64 pcmFrameIndex, drflac_uint64 byteRangeLo, drflac_uint64 byteRangeHi)
+{
+    /* This assumes pFlac->currentPCMFrame is sitting on byteRangeLo upon entry. */
+
+    drflac_uint64 targetByte;
+    drflac_uint64 pcmRangeLo = pFlac->totalPCMFrameCount;
+    drflac_uint64 pcmRangeHi = 0;
+    drflac_uint64 lastSuccessfulSeekOffset = (drflac_uint64)-1;
+    drflac_uint64 closestSeekOffsetBeforeTargetPCMFrame = byteRangeLo;
+    drflac_uint32 seekForwardThreshold = (pFlac->maxBlockSizeInPCMFrames != 0) ? pFlac->maxBlockSizeInPCMFrames*2 : 4096;
+
+    targetByte = byteRangeLo + (drflac_uint64)(((drflac_int64)((pcmFrameIndex - pFlac->currentPCMFrame) * pFlac->channels * pFlac->bitsPerSample)/8.0f) * DRFLAC_BINARY_SEARCH_APPROX_COMPRESSION_RATIO);
+    if (targetByte > byteRangeHi) {
+        targetByte = byteRangeHi;
+    }
+
+    for (;;) {
+        if (drflac__seek_to_approximate_flac_frame_to_byte(pFlac, targetByte, byteRangeLo, byteRangeHi, &lastSuccessfulSeekOffset)) {
+            /* We found a FLAC frame. We need to check if it contains the sample we're looking for. */
+            drflac_uint64 newPCMRangeLo;
+            drflac_uint64 newPCMRangeHi;
+            drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &newPCMRangeLo, &newPCMRangeHi);
+
+            /* If we selected the same frame, it means we should be pretty close. Just decode the rest. */
+            if (pcmRangeLo == newPCMRangeLo) {
+                if (!drflac__seek_to_approximate_flac_frame_to_byte(pFlac, closestSeekOffsetBeforeTargetPCMFrame, closestSeekOffsetBeforeTargetPCMFrame, byteRangeHi, &lastSuccessfulSeekOffset)) {
+                    break;  /* Failed to seek to closest frame. */
+                }
+
+                if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame)) {
+                    return DRFLAC_TRUE;
+                } else {
+                    break;  /* Failed to seek forward. */
+                }
+            }
+
+            pcmRangeLo = newPCMRangeLo;
+            pcmRangeHi = newPCMRangeHi;
+
+            if (pcmRangeLo <= pcmFrameIndex && pcmRangeHi >= pcmFrameIndex) {
+                /* The target PCM frame is in this FLAC frame. */
+                if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame) ) {
+                    return DRFLAC_TRUE;
+                } else {
+                    break;  /* Failed to seek to FLAC frame. */
+                }
+            } else {
+                const double approxCompressionRatio = (drflac_int64)(lastSuccessfulSeekOffset - pFlac->firstFLACFramePosInBytes) / ((drflac_int64)(pcmRangeLo * pFlac->channels * pFlac->bitsPerSample)/8.0f);
+
+                if (pcmRangeLo > pcmFrameIndex) {
+                    /* We seeked too far forward. We need to move our target byte backward and try again. */
+                    byteRangeHi = lastSuccessfulSeekOffset;
+                    if (byteRangeLo > byteRangeHi) {
+                        byteRangeLo = byteRangeHi;
+                    }
+
+                    targetByte = byteRangeLo + ((byteRangeHi - byteRangeLo) / 2);
+                    if (targetByte < byteRangeLo) {
+                        targetByte = byteRangeLo;
+                    }
+                } else /*if (pcmRangeHi < pcmFrameIndex)*/ {
+                    /* We didn't seek far enough. We need to move our target byte forward and try again. */
+
+                    /* If we're close enough we can just seek forward. */
+                    if ((pcmFrameIndex - pcmRangeLo) < seekForwardThreshold) {
+                        if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame)) {
+                            return DRFLAC_TRUE;
+                        } else {
+                            break;  /* Failed to seek to FLAC frame. */
+                        }
+                    } else {
+                        byteRangeLo = lastSuccessfulSeekOffset;
+                        if (byteRangeHi < byteRangeLo) {
+                            byteRangeHi = byteRangeLo;
+                        }
+
+                        targetByte = lastSuccessfulSeekOffset + (drflac_uint64)(((drflac_int64)((pcmFrameIndex-pcmRangeLo) * pFlac->channels * pFlac->bitsPerSample)/8.0f) * approxCompressionRatio);
+                        if (targetByte > byteRangeHi) {
+                            targetByte = byteRangeHi;
+                        }
+
+                        if (closestSeekOffsetBeforeTargetPCMFrame < lastSuccessfulSeekOffset) {
+                            closestSeekOffsetBeforeTargetPCMFrame = lastSuccessfulSeekOffset;
+                        }
+                    }
+                }
+            }
+        } else {
+            /* Getting here is really bad. We just recover as best we can, but moving to the first frame in the stream, and then abort. */
+            break;
+        }
+    }
+
+    drflac__seek_to_first_frame(pFlac); /* <-- Try to recover. */
+    return DRFLAC_FALSE;
+}
+
+static drflac_bool32 drflac__seek_to_pcm_frame__binary_search(drflac* pFlac, drflac_uint64 pcmFrameIndex)
+{
+    drflac_uint64 byteRangeLo;
+    drflac_uint64 byteRangeHi;
+    drflac_uint32 seekForwardThreshold = (pFlac->maxBlockSizeInPCMFrames != 0) ? pFlac->maxBlockSizeInPCMFrames*2 : 4096;
+
+    /* Our algorithm currently assumes the FLAC stream is currently sitting at the start. */
+    if (drflac__seek_to_first_frame(pFlac) == DRFLAC_FALSE) {
+        return DRFLAC_FALSE;
+    }
+
+    /* If we're close enough to the start, just move to the start and seek forward. */
+    if (pcmFrameIndex < seekForwardThreshold) {
+        return drflac__seek_forward_by_pcm_frames(pFlac, pcmFrameIndex) == pcmFrameIndex;
+    }
+
+    /*
+    Our starting byte range is the byte position of the first FLAC frame and the approximate end of the file as if it were completely uncompressed. This ensures
+    the entire file is included, even though most of the time it'll exceed the end of the actual stream. This is OK as the frame searching logic will handle it.
+    */
+    byteRangeLo = pFlac->firstFLACFramePosInBytes;
+    byteRangeHi = pFlac->firstFLACFramePosInBytes + (drflac_uint64)((drflac_int64)(pFlac->totalPCMFrameCount * pFlac->channels * pFlac->bitsPerSample)/8.0f);
+
+    return drflac__seek_to_pcm_frame__binary_search_internal(pFlac, pcmFrameIndex, byteRangeLo, byteRangeHi);
+}
+#endif  /* !DR_FLAC_NO_CRC */
+
+static drflac_bool32 drflac__seek_to_pcm_frame__seek_table(drflac* pFlac, drflac_uint64 pcmFrameIndex)
+{
+    drflac_uint32 iClosestSeekpoint = 0;
+    drflac_bool32 isMidFrame = DRFLAC_FALSE;
+    drflac_uint64 runningPCMFrameCount;
+    drflac_uint32 iSeekpoint;
+
+
+    DRFLAC_ASSERT(pFlac != NULL);
+
+    if (pFlac->pSeekpoints == NULL || pFlac->seekpointCount == 0) {
+        return DRFLAC_FALSE;
+    }
+
+    for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) {
+        if (pFlac->pSeekpoints[iSeekpoint].firstPCMFrame >= pcmFrameIndex) {
+            break;
+        }
+
+        iClosestSeekpoint = iSeekpoint;
+    }
+
+    /* There's been cases where the seek table contains only zeros. We need to do some basic validation on the closest seekpoint. */
+    if (pFlac->pSeekpoints[iClosestSeekpoint].pcmFrameCount == 0 || pFlac->pSeekpoints[iClosestSeekpoint].pcmFrameCount > pFlac->maxBlockSizeInPCMFrames) {
+        return DRFLAC_FALSE;
+    }
+    if (pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame > pFlac->totalPCMFrameCount && pFlac->totalPCMFrameCount > 0) {
+        return DRFLAC_FALSE;
+    }
+
+#if !defined(DR_FLAC_NO_CRC)
+    /* At this point we should know the closest seek point. We can use a binary search for this. We need to know the total sample count for this. */
+    if (pFlac->totalPCMFrameCount > 0) {
+        drflac_uint64 byteRangeLo;
+        drflac_uint64 byteRangeHi;
+
+        byteRangeHi = pFlac->firstFLACFramePosInBytes + (drflac_uint64)((drflac_int64)(pFlac->totalPCMFrameCount * pFlac->channels * pFlac->bitsPerSample)/8.0f);
+        byteRangeLo = pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset;
+
+        /*
+        If our closest seek point is not the last one, we only need to search between it and the next one. The section below calculates an appropriate starting
+        value for byteRangeHi which will clamp it appropriately.
+
+        Note that the next seekpoint must have an offset greater than the closest seekpoint because otherwise our binary search algorithm will break down. There
+        have been cases where a seektable consists of seek points where every byte offset is set to 0 which causes problems. If this happens we need to abort.
+        */
+        if (iClosestSeekpoint < pFlac->seekpointCount-1) {
+            drflac_uint32 iNextSeekpoint = iClosestSeekpoint + 1;
+
+            /* Basic validation on the seekpoints to ensure they're usable. */
+            if (pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset >= pFlac->pSeekpoints[iNextSeekpoint].flacFrameOffset || pFlac->pSeekpoints[iNextSeekpoint].pcmFrameCount == 0) {
+                return DRFLAC_FALSE;    /* The next seekpoint doesn't look right. The seek table cannot be trusted from here. Abort. */
+            }
+
+            if (pFlac->pSeekpoints[iNextSeekpoint].firstPCMFrame != (((drflac_uint64)0xFFFFFFFF << 32) | 0xFFFFFFFF)) { /* Make sure it's not a placeholder seekpoint. */
+                byteRangeHi = pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iNextSeekpoint].flacFrameOffset - 1; /* byteRangeHi must be zero based. */
+            }
+        }
+
+        if (drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset)) {
+            if (drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+                drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &pFlac->currentPCMFrame, NULL);
+
+                if (drflac__seek_to_pcm_frame__binary_search_internal(pFlac, pcmFrameIndex, byteRangeLo, byteRangeHi)) {
+                    return DRFLAC_TRUE;
+                }
+            }
+        }
+    }
+#endif  /* !DR_FLAC_NO_CRC */
+
+    /* Getting here means we need to use a slower algorithm because the binary search method failed or cannot be used. */
+
+    /*
+    If we are seeking forward and the closest seekpoint is _before_ the current sample, we just seek forward from where we are. Otherwise we start seeking
+    from the seekpoint's first sample.
+    */
+    if (pcmFrameIndex >= pFlac->currentPCMFrame && pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame <= pFlac->currentPCMFrame) {
+        /* Optimized case. Just seek forward from where we are. */
+        runningPCMFrameCount = pFlac->currentPCMFrame;
+
+        /* The frame header for the first frame may not yet have been read. We need to do that if necessary. */
+        if (pFlac->currentPCMFrame == 0 && pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            isMidFrame = DRFLAC_TRUE;
+        }
+    } else {
+        /* Slower case. Seek to the start of the seekpoint and then seek forward from there. */
+        runningPCMFrameCount = pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame;
+
+        if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Grab the frame the seekpoint is sitting on in preparation for the sample-exact seeking below. */
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    for (;;) {
+        drflac_uint64 pcmFrameCountInThisFLACFrame;
+        drflac_uint64 firstPCMFrameInFLACFrame = 0;
+        drflac_uint64 lastPCMFrameInFLACFrame = 0;
+
+        drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame);
+
+        pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
+        if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) {
+            /*
+            The sample should be in this frame. We need to fully decode it, but if it's an invalid frame (a CRC mismatch) we need to pretend
+            it never existed and keep iterating.
+            */
+            drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount;
+
+            if (!isMidFrame) {
+                drflac_result result = drflac__decode_flac_frame(pFlac);
+                if (result == DRFLAC_SUCCESS) {
+                    /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */
+                    return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;  /* <-- If this fails, something bad has happened (it should never fail). */
+                } else {
+                    if (result == DRFLAC_CRC_MISMATCH) {
+                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
+                    } else {
+                        return DRFLAC_FALSE;
+                    }
+                }
+            } else {
+                /* We started seeking mid-frame which means we need to skip the frame decoding part. */
+                return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;
+            }
+        } else {
+            /*
+            It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
+            frame never existed and leave the running sample count untouched.
+            */
+            if (!isMidFrame) {
+                drflac_result result = drflac__seek_to_next_flac_frame(pFlac);
+                if (result == DRFLAC_SUCCESS) {
+                    runningPCMFrameCount += pcmFrameCountInThisFLACFrame;
+                } else {
+                    if (result == DRFLAC_CRC_MISMATCH) {
+                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
+                    } else {
+                        return DRFLAC_FALSE;
+                    }
+                }
+            } else {
+                /*
+                We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with
+                drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header.
+                */
+                runningPCMFrameCount += pFlac->currentFLACFrame.pcmFramesRemaining;
+                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
+                isMidFrame = DRFLAC_FALSE;
+            }
+
+            /* If we are seeking to the end of the file and we've just hit it, we're done. */
+            if (pcmFrameIndex == pFlac->totalPCMFrameCount && runningPCMFrameCount == pFlac->totalPCMFrameCount) {
+                return DRFLAC_TRUE;
+            }
+        }
+
+    next_iteration:
+        /* Grab the next frame in preparation for the next iteration. */
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+    }
+}
+
+
+#ifndef DR_FLAC_NO_OGG
+typedef struct
+{
+    drflac_uint8 capturePattern[4];  /* Should be "OggS" */
+    drflac_uint8 structureVersion;   /* Always 0. */
+    drflac_uint8 headerType;
+    drflac_uint64 granulePosition;
+    drflac_uint32 serialNumber;
+    drflac_uint32 sequenceNumber;
+    drflac_uint32 checksum;
+    drflac_uint8 segmentCount;
+    drflac_uint8 segmentTable[255];
+} drflac_ogg_page_header;
+#endif
+
+typedef struct
+{
+    drflac_read_proc onRead;
+    drflac_seek_proc onSeek;
+    drflac_meta_proc onMeta;
+    drflac_container container;
+    void* pUserData;
+    void* pUserDataMD;
+    drflac_uint32 sampleRate;
+    drflac_uint8  channels;
+    drflac_uint8  bitsPerSample;
+    drflac_uint64 totalPCMFrameCount;
+    drflac_uint16 maxBlockSizeInPCMFrames;
+    drflac_uint64 runningFilePos;
+    drflac_bool32 hasStreamInfoBlock;
+    drflac_bool32 hasMetadataBlocks;
+    drflac_bs bs;                           /* <-- A bit streamer is required for loading data during initialization. */
+    drflac_frame_header firstFrameHeader;   /* <-- The header of the first frame that was read during relaxed initalization. Only set if there is no STREAMINFO block. */
+
+#ifndef DR_FLAC_NO_OGG
+    drflac_uint32 oggSerial;
+    drflac_uint64 oggFirstBytePos;
+    drflac_ogg_page_header oggBosHeader;
+#endif
+} drflac_init_info;
+
+static DRFLAC_INLINE void drflac__decode_block_header(drflac_uint32 blockHeader, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize)
+{
+    blockHeader = drflac__be2host_32(blockHeader);
+    *isLastBlock = (drflac_uint8)((blockHeader & 0x80000000UL) >> 31);
+    *blockType   = (drflac_uint8)((blockHeader & 0x7F000000UL) >> 24);
+    *blockSize   =                (blockHeader & 0x00FFFFFFUL);
+}
+
+static DRFLAC_INLINE drflac_bool32 drflac__read_and_decode_block_header(drflac_read_proc onRead, void* pUserData, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize)
+{
+    drflac_uint32 blockHeader;
+
+    *blockSize = 0;
+    if (onRead(pUserData, &blockHeader, 4) != 4) {
+        return DRFLAC_FALSE;
+    }
+
+    drflac__decode_block_header(blockHeader, isLastBlock, blockType, blockSize);
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__read_streaminfo(drflac_read_proc onRead, void* pUserData, drflac_streaminfo* pStreamInfo)
+{
+    drflac_uint32 blockSizes;
+    drflac_uint64 frameSizes = 0;
+    drflac_uint64 importantProps;
+    drflac_uint8 md5[16];
+
+    /* min/max block size. */
+    if (onRead(pUserData, &blockSizes, 4) != 4) {
+        return DRFLAC_FALSE;
+    }
+
+    /* min/max frame size. */
+    if (onRead(pUserData, &frameSizes, 6) != 6) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Sample rate, channels, bits per sample and total sample count. */
+    if (onRead(pUserData, &importantProps, 8) != 8) {
+        return DRFLAC_FALSE;
+    }
+
+    /* MD5 */
+    if (onRead(pUserData, md5, sizeof(md5)) != sizeof(md5)) {
+        return DRFLAC_FALSE;
+    }
+
+    blockSizes     = drflac__be2host_32(blockSizes);
+    frameSizes     = drflac__be2host_64(frameSizes);
+    importantProps = drflac__be2host_64(importantProps);
+
+    pStreamInfo->minBlockSizeInPCMFrames = (drflac_uint16)((blockSizes & 0xFFFF0000) >> 16);
+    pStreamInfo->maxBlockSizeInPCMFrames = (drflac_uint16) (blockSizes & 0x0000FFFF);
+    pStreamInfo->minFrameSizeInPCMFrames = (drflac_uint32)((frameSizes     &  (((drflac_uint64)0x00FFFFFF << 16) << 24)) >> 40);
+    pStreamInfo->maxFrameSizeInPCMFrames = (drflac_uint32)((frameSizes     &  (((drflac_uint64)0x00FFFFFF << 16) <<  0)) >> 16);
+    pStreamInfo->sampleRate              = (drflac_uint32)((importantProps &  (((drflac_uint64)0x000FFFFF << 16) << 28)) >> 44);
+    pStreamInfo->channels                = (drflac_uint8 )((importantProps &  (((drflac_uint64)0x0000000E << 16) << 24)) >> 41) + 1;
+    pStreamInfo->bitsPerSample           = (drflac_uint8 )((importantProps &  (((drflac_uint64)0x0000001F << 16) << 20)) >> 36) + 1;
+    pStreamInfo->totalPCMFrameCount      =                ((importantProps & ((((drflac_uint64)0x0000000F << 16) << 16) | 0xFFFFFFFF)));
+    DRFLAC_COPY_MEMORY(pStreamInfo->md5, md5, sizeof(md5));
+
+    return DRFLAC_TRUE;
+}
+
+
+static void* drflac__malloc_default(size_t sz, void* pUserData)
+{
+    (void)pUserData;
+    return DRFLAC_MALLOC(sz);
+}
+
+static void* drflac__realloc_default(void* p, size_t sz, void* pUserData)
+{
+    (void)pUserData;
+    return DRFLAC_REALLOC(p, sz);
+}
+
+static void drflac__free_default(void* p, void* pUserData)
+{
+    (void)pUserData;
+    DRFLAC_FREE(p);
+}
+
+
+static void* drflac__malloc_from_callbacks(size_t sz, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks == NULL) {
+        return NULL;
+    }
+
+    if (pAllocationCallbacks->onMalloc != NULL) {
+        return pAllocationCallbacks->onMalloc(sz, pAllocationCallbacks->pUserData);
+    }
+
+    /* Try using realloc(). */
+    if (pAllocationCallbacks->onRealloc != NULL) {
+        return pAllocationCallbacks->onRealloc(NULL, sz, pAllocationCallbacks->pUserData);
+    }
+
+    return NULL;
+}
+
+static void* drflac__realloc_from_callbacks(void* p, size_t szNew, size_t szOld, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks == NULL) {
+        return NULL;
+    }
+
+    if (pAllocationCallbacks->onRealloc != NULL) {
+        return pAllocationCallbacks->onRealloc(p, szNew, pAllocationCallbacks->pUserData);
+    }
+
+    /* Try emulating realloc() in terms of malloc()/free(). */
+    if (pAllocationCallbacks->onMalloc != NULL && pAllocationCallbacks->onFree != NULL) {
+        void* p2;
+
+        p2 = pAllocationCallbacks->onMalloc(szNew, pAllocationCallbacks->pUserData);
+        if (p2 == NULL) {
+            return NULL;
+        }
+
+        if (p != NULL) {
+            DRFLAC_COPY_MEMORY(p2, p, szOld);
+            pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
+        }
+
+        return p2;
+    }
+
+    return NULL;
+}
+
+static void drflac__free_from_callbacks(void* p, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    if (p == NULL || pAllocationCallbacks == NULL) {
+        return;
+    }
+
+    if (pAllocationCallbacks->onFree != NULL) {
+        pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
+    }
+}
+
+
+static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_uint64* pFirstFramePos, drflac_uint64* pSeektablePos, drflac_uint32* pSeektableSize, drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    /*
+    We want to keep track of the byte position in the stream of the seektable. At the time of calling this function we know that
+    we'll be sitting on byte 42.
+    */
+    drflac_uint64 runningFilePos = 42;
+    drflac_uint64 seektablePos   = 0;
+    drflac_uint32 seektableSize  = 0;
+
+    for (;;) {
+        drflac_metadata metadata;
+        drflac_uint8 isLastBlock = 0;
+        drflac_uint8 blockType;
+        drflac_uint32 blockSize;
+        if (drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize) == DRFLAC_FALSE) {
+            return DRFLAC_FALSE;
+        }
+        runningFilePos += 4;
+
+        metadata.type = blockType;
+        metadata.pRawData = NULL;
+        metadata.rawDataSize = 0;
+
+        switch (blockType)
+        {
+            case DRFLAC_METADATA_BLOCK_TYPE_APPLICATION:
+            {
+                if (blockSize < 4) {
+                    return DRFLAC_FALSE;
+                }
+
+                if (onMeta) {
+                    void* pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+                    metadata.data.application.id       = drflac__be2host_32(*(drflac_uint32*)pRawData);
+                    metadata.data.application.pData    = (const void*)((drflac_uint8*)pRawData + sizeof(drflac_uint32));
+                    metadata.data.application.dataSize = blockSize - sizeof(drflac_uint32);
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_SEEKTABLE:
+            {
+                seektablePos  = runningFilePos;
+                seektableSize = blockSize;
+
+                if (onMeta) {
+                    drflac_uint32 iSeekpoint;
+                    void* pRawData;
+
+                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+                    metadata.data.seektable.seekpointCount = blockSize/sizeof(drflac_seekpoint);
+                    metadata.data.seektable.pSeekpoints = (const drflac_seekpoint*)pRawData;
+
+                    /* Endian swap. */
+                    for (iSeekpoint = 0; iSeekpoint < metadata.data.seektable.seekpointCount; ++iSeekpoint) {
+                        drflac_seekpoint* pSeekpoint = (drflac_seekpoint*)pRawData + iSeekpoint;
+                        pSeekpoint->firstPCMFrame   = drflac__be2host_64(pSeekpoint->firstPCMFrame);
+                        pSeekpoint->flacFrameOffset = drflac__be2host_64(pSeekpoint->flacFrameOffset);
+                        pSeekpoint->pcmFrameCount   = drflac__be2host_16(pSeekpoint->pcmFrameCount);
+                    }
+
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT:
+            {
+                if (blockSize < 8) {
+                    return DRFLAC_FALSE;
+                }
+
+                if (onMeta) {
+                    void* pRawData;
+                    const char* pRunningData;
+                    const char* pRunningDataEnd;
+                    drflac_uint32 i;
+
+                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+
+                    pRunningData    = (const char*)pRawData;
+                    pRunningDataEnd = (const char*)pRawData + blockSize;
+
+                    metadata.data.vorbis_comment.vendorLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+
+                    /* Need space for the rest of the block */
+                    if ((pRunningDataEnd - pRunningData) - 4 < (drflac_int64)metadata.data.vorbis_comment.vendorLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+                    metadata.data.vorbis_comment.vendor       = pRunningData;                                            pRunningData += metadata.data.vorbis_comment.vendorLength;
+                    metadata.data.vorbis_comment.commentCount = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+
+                    /* Need space for 'commentCount' comments after the block, which at minimum is a drflac_uint32 per comment */
+                    if ((pRunningDataEnd - pRunningData) / sizeof(drflac_uint32) < metadata.data.vorbis_comment.commentCount) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+                    metadata.data.vorbis_comment.pComments    = pRunningData;
+
+                    /* Check that the comments section is valid before passing it to the callback */
+                    for (i = 0; i < metadata.data.vorbis_comment.commentCount; ++i) {
+                        drflac_uint32 commentLength;
+
+                        if (pRunningDataEnd - pRunningData < 4) {
+                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                            return DRFLAC_FALSE;
+                        }
+
+                        commentLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                        if (pRunningDataEnd - pRunningData < (drflac_int64)commentLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                            return DRFLAC_FALSE;
+                        }
+                        pRunningData += commentLength;
+                    }
+
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_CUESHEET:
+            {
+                if (blockSize < 396) {
+                    return DRFLAC_FALSE;
+                }
+
+                if (onMeta) {
+                    void* pRawData;
+                    const char* pRunningData;
+                    const char* pRunningDataEnd;
+                    drflac_uint8 iTrack;
+                    drflac_uint8 iIndex;
+
+                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+
+                    pRunningData    = (const char*)pRawData;
+                    pRunningDataEnd = (const char*)pRawData + blockSize;
+
+                    DRFLAC_COPY_MEMORY(metadata.data.cuesheet.catalog, pRunningData, 128);                              pRunningData += 128;
+                    metadata.data.cuesheet.leadInSampleCount = drflac__be2host_64(*(const drflac_uint64*)pRunningData); pRunningData += 8;
+                    metadata.data.cuesheet.isCD              = (pRunningData[0] & 0x80) != 0;                           pRunningData += 259;
+                    metadata.data.cuesheet.trackCount        = pRunningData[0];                                         pRunningData += 1;
+                    metadata.data.cuesheet.pTrackData        = pRunningData;
+
+                    /* Check that the cuesheet tracks are valid before passing it to the callback */
+                    for (iTrack = 0; iTrack < metadata.data.cuesheet.trackCount; ++iTrack) {
+                        drflac_uint8 indexCount;
+                        drflac_uint32 indexPointSize;
+
+                        if (pRunningDataEnd - pRunningData < 36) {
+                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                            return DRFLAC_FALSE;
+                        }
+
+                        /* Skip to the index point count */
+                        pRunningData += 35;
+                        indexCount = pRunningData[0]; pRunningData += 1;
+                        indexPointSize = indexCount * sizeof(drflac_cuesheet_track_index);
+                        if (pRunningDataEnd - pRunningData < (drflac_int64)indexPointSize) {
+                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                            return DRFLAC_FALSE;
+                        }
+
+                        /* Endian swap. */
+                        for (iIndex = 0; iIndex < indexCount; ++iIndex) {
+                            drflac_cuesheet_track_index* pTrack = (drflac_cuesheet_track_index*)pRunningData;
+                            pRunningData += sizeof(drflac_cuesheet_track_index);
+                            pTrack->offset = drflac__be2host_64(pTrack->offset);
+                        }
+                    }
+
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_PICTURE:
+            {
+                if (blockSize < 32) {
+                    return DRFLAC_FALSE;
+                }
+
+                if (onMeta) {
+                    void* pRawData;
+                    const char* pRunningData;
+                    const char* pRunningDataEnd;
+
+                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+
+                    pRunningData    = (const char*)pRawData;
+                    pRunningDataEnd = (const char*)pRawData + blockSize;
+
+                    metadata.data.picture.type       = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.mimeLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+
+                    /* Need space for the rest of the block */
+                    if ((pRunningDataEnd - pRunningData) - 24 < (drflac_int64)metadata.data.picture.mimeLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+                    metadata.data.picture.mime              = pRunningData;                                            pRunningData += metadata.data.picture.mimeLength;
+                    metadata.data.picture.descriptionLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+
+                    /* Need space for the rest of the block */
+                    if ((pRunningDataEnd - pRunningData) - 20 < (drflac_int64)metadata.data.picture.descriptionLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+                    metadata.data.picture.description     = pRunningData;                                            pRunningData += metadata.data.picture.descriptionLength;
+                    metadata.data.picture.width           = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.height          = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.colorDepth      = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.indexColorCount = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.pictureDataSize = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.pPictureData    = (const drflac_uint8*)pRunningData;
+
+                    /* Need space for the picture after the block */
+                    if (pRunningDataEnd - pRunningData < (drflac_int64)metadata.data.picture.pictureDataSize) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_PADDING:
+            {
+                if (onMeta) {
+                    metadata.data.padding.unused = 0;
+
+                    /* Padding doesn't have anything meaningful in it, so just skip over it, but make sure the caller is aware of it by firing the callback. */
+                    if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
+                        isLastBlock = DRFLAC_TRUE;  /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */
+                    } else {
+                        onMeta(pUserDataMD, &metadata);
+                    }
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_INVALID:
+            {
+                /* Invalid chunk. Just skip over this one. */
+                if (onMeta) {
+                    if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
+                        isLastBlock = DRFLAC_TRUE;  /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */
+                    }
+                }
+            } break;
+
+            default:
+            {
+                /*
+                It's an unknown chunk, but not necessarily invalid. There's a chance more metadata blocks might be defined later on, so we
+                can at the very least report the chunk to the application and let it look at the raw data.
+                */
+                if (onMeta) {
+                    void* pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+        }
+
+        /* If we're not handling metadata, just skip over the block. If we are, it will have been handled earlier in the switch statement above. */
+        if (onMeta == NULL && blockSize > 0) {
+            if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
+                isLastBlock = DRFLAC_TRUE;
+            }
+        }
+
+        runningFilePos += blockSize;
+        if (isLastBlock) {
+            break;
+        }
+    }
+
+    *pSeektablePos = seektablePos;
+    *pSeektableSize = seektableSize;
+    *pFirstFramePos = runningFilePos;
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__init_private__native(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed)
+{
+    /* Pre Condition: The bit stream should be sitting just past the 4-byte id header. */
+
+    drflac_uint8 isLastBlock;
+    drflac_uint8 blockType;
+    drflac_uint32 blockSize;
+
+    (void)onSeek;
+
+    pInit->container = drflac_container_native;
+
+    /* The first metadata block should be the STREAMINFO block. */
+    if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) {
+        return DRFLAC_FALSE;
+    }
+
+    if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) {
+        if (!relaxed) {
+            /* We're opening in strict mode and the first block is not the STREAMINFO block. Error. */
+            return DRFLAC_FALSE;
+        } else {
+            /*
+            Relaxed mode. To open from here we need to just find the first frame and set the sample rate, etc. to whatever is defined
+            for that frame.
+            */
+            pInit->hasStreamInfoBlock = DRFLAC_FALSE;
+            pInit->hasMetadataBlocks  = DRFLAC_FALSE;
+
+            if (!drflac__read_next_flac_frame_header(&pInit->bs, 0, &pInit->firstFrameHeader)) {
+                return DRFLAC_FALSE;    /* Couldn't find a frame. */
+            }
+
+            if (pInit->firstFrameHeader.bitsPerSample == 0) {
+                return DRFLAC_FALSE;    /* Failed to initialize because the first frame depends on the STREAMINFO block, which does not exist. */
+            }
+
+            pInit->sampleRate              = pInit->firstFrameHeader.sampleRate;
+            pInit->channels                = drflac__get_channel_count_from_channel_assignment(pInit->firstFrameHeader.channelAssignment);
+            pInit->bitsPerSample           = pInit->firstFrameHeader.bitsPerSample;
+            pInit->maxBlockSizeInPCMFrames = 65535;   /* <-- See notes here: https://xiph.org/flac/format.html#metadata_block_streaminfo */
+            return DRFLAC_TRUE;
+        }
+    } else {
+        drflac_streaminfo streaminfo;
+        if (!drflac__read_streaminfo(onRead, pUserData, &streaminfo)) {
+            return DRFLAC_FALSE;
+        }
+
+        pInit->hasStreamInfoBlock      = DRFLAC_TRUE;
+        pInit->sampleRate              = streaminfo.sampleRate;
+        pInit->channels                = streaminfo.channels;
+        pInit->bitsPerSample           = streaminfo.bitsPerSample;
+        pInit->totalPCMFrameCount      = streaminfo.totalPCMFrameCount;
+        pInit->maxBlockSizeInPCMFrames = streaminfo.maxBlockSizeInPCMFrames;    /* Don't care about the min block size - only the max (used for determining the size of the memory allocation). */
+        pInit->hasMetadataBlocks       = !isLastBlock;
+
+        if (onMeta) {
+            drflac_metadata metadata;
+            metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO;
+            metadata.pRawData = NULL;
+            metadata.rawDataSize = 0;
+            metadata.data.streaminfo = streaminfo;
+            onMeta(pUserDataMD, &metadata);
+        }
+
+        return DRFLAC_TRUE;
+    }
+}
+
+#ifndef DR_FLAC_NO_OGG
+#define DRFLAC_OGG_MAX_PAGE_SIZE            65307
+#define DRFLAC_OGG_CAPTURE_PATTERN_CRC32    1605413199  /* CRC-32 of "OggS". */
+
+typedef enum
+{
+    drflac_ogg_recover_on_crc_mismatch,
+    drflac_ogg_fail_on_crc_mismatch
+} drflac_ogg_crc_mismatch_recovery;
+
+#ifndef DR_FLAC_NO_CRC
+static drflac_uint32 drflac__crc32_table[] = {
+    0x00000000L, 0x04C11DB7L, 0x09823B6EL, 0x0D4326D9L,
+    0x130476DCL, 0x17C56B6BL, 0x1A864DB2L, 0x1E475005L,
+    0x2608EDB8L, 0x22C9F00FL, 0x2F8AD6D6L, 0x2B4BCB61L,
+    0x350C9B64L, 0x31CD86D3L, 0x3C8EA00AL, 0x384FBDBDL,
+    0x4C11DB70L, 0x48D0C6C7L, 0x4593E01EL, 0x4152FDA9L,
+    0x5F15ADACL, 0x5BD4B01BL, 0x569796C2L, 0x52568B75L,
+    0x6A1936C8L, 0x6ED82B7FL, 0x639B0DA6L, 0x675A1011L,
+    0x791D4014L, 0x7DDC5DA3L, 0x709F7B7AL, 0x745E66CDL,
+    0x9823B6E0L, 0x9CE2AB57L, 0x91A18D8EL, 0x95609039L,
+    0x8B27C03CL, 0x8FE6DD8BL, 0x82A5FB52L, 0x8664E6E5L,
+    0xBE2B5B58L, 0xBAEA46EFL, 0xB7A96036L, 0xB3687D81L,
+    0xAD2F2D84L, 0xA9EE3033L, 0xA4AD16EAL, 0xA06C0B5DL,
+    0xD4326D90L, 0xD0F37027L, 0xDDB056FEL, 0xD9714B49L,
+    0xC7361B4CL, 0xC3F706FBL, 0xCEB42022L, 0xCA753D95L,
+    0xF23A8028L, 0xF6FB9D9FL, 0xFBB8BB46L, 0xFF79A6F1L,
+    0xE13EF6F4L, 0xE5FFEB43L, 0xE8BCCD9AL, 0xEC7DD02DL,
+    0x34867077L, 0x30476DC0L, 0x3D044B19L, 0x39C556AEL,
+    0x278206ABL, 0x23431B1CL, 0x2E003DC5L, 0x2AC12072L,
+    0x128E9DCFL, 0x164F8078L, 0x1B0CA6A1L, 0x1FCDBB16L,
+    0x018AEB13L, 0x054BF6A4L, 0x0808D07DL, 0x0CC9CDCAL,
+    0x7897AB07L, 0x7C56B6B0L, 0x71159069L, 0x75D48DDEL,
+    0x6B93DDDBL, 0x6F52C06CL, 0x6211E6B5L, 0x66D0FB02L,
+    0x5E9F46BFL, 0x5A5E5B08L, 0x571D7DD1L, 0x53DC6066L,
+    0x4D9B3063L, 0x495A2DD4L, 0x44190B0DL, 0x40D816BAL,
+    0xACA5C697L, 0xA864DB20L, 0xA527FDF9L, 0xA1E6E04EL,
+    0xBFA1B04BL, 0xBB60ADFCL, 0xB6238B25L, 0xB2E29692L,
+    0x8AAD2B2FL, 0x8E6C3698L, 0x832F1041L, 0x87EE0DF6L,
+    0x99A95DF3L, 0x9D684044L, 0x902B669DL, 0x94EA7B2AL,
+    0xE0B41DE7L, 0xE4750050L, 0xE9362689L, 0xEDF73B3EL,
+    0xF3B06B3BL, 0xF771768CL, 0xFA325055L, 0xFEF34DE2L,
+    0xC6BCF05FL, 0xC27DEDE8L, 0xCF3ECB31L, 0xCBFFD686L,
+    0xD5B88683L, 0xD1799B34L, 0xDC3ABDEDL, 0xD8FBA05AL,
+    0x690CE0EEL, 0x6DCDFD59L, 0x608EDB80L, 0x644FC637L,
+    0x7A089632L, 0x7EC98B85L, 0x738AAD5CL, 0x774BB0EBL,
+    0x4F040D56L, 0x4BC510E1L, 0x46863638L, 0x42472B8FL,
+    0x5C007B8AL, 0x58C1663DL, 0x558240E4L, 0x51435D53L,
+    0x251D3B9EL, 0x21DC2629L, 0x2C9F00F0L, 0x285E1D47L,
+    0x36194D42L, 0x32D850F5L, 0x3F9B762CL, 0x3B5A6B9BL,
+    0x0315D626L, 0x07D4CB91L, 0x0A97ED48L, 0x0E56F0FFL,
+    0x1011A0FAL, 0x14D0BD4DL, 0x19939B94L, 0x1D528623L,
+    0xF12F560EL, 0xF5EE4BB9L, 0xF8AD6D60L, 0xFC6C70D7L,
+    0xE22B20D2L, 0xE6EA3D65L, 0xEBA91BBCL, 0xEF68060BL,
+    0xD727BBB6L, 0xD3E6A601L, 0xDEA580D8L, 0xDA649D6FL,
+    0xC423CD6AL, 0xC0E2D0DDL, 0xCDA1F604L, 0xC960EBB3L,
+    0xBD3E8D7EL, 0xB9FF90C9L, 0xB4BCB610L, 0xB07DABA7L,
+    0xAE3AFBA2L, 0xAAFBE615L, 0xA7B8C0CCL, 0xA379DD7BL,
+    0x9B3660C6L, 0x9FF77D71L, 0x92B45BA8L, 0x9675461FL,
+    0x8832161AL, 0x8CF30BADL, 0x81B02D74L, 0x857130C3L,
+    0x5D8A9099L, 0x594B8D2EL, 0x5408ABF7L, 0x50C9B640L,
+    0x4E8EE645L, 0x4A4FFBF2L, 0x470CDD2BL, 0x43CDC09CL,
+    0x7B827D21L, 0x7F436096L, 0x7200464FL, 0x76C15BF8L,
+    0x68860BFDL, 0x6C47164AL, 0x61043093L, 0x65C52D24L,
+    0x119B4BE9L, 0x155A565EL, 0x18197087L, 0x1CD86D30L,
+    0x029F3D35L, 0x065E2082L, 0x0B1D065BL, 0x0FDC1BECL,
+    0x3793A651L, 0x3352BBE6L, 0x3E119D3FL, 0x3AD08088L,
+    0x2497D08DL, 0x2056CD3AL, 0x2D15EBE3L, 0x29D4F654L,
+    0xC5A92679L, 0xC1683BCEL, 0xCC2B1D17L, 0xC8EA00A0L,
+    0xD6AD50A5L, 0xD26C4D12L, 0xDF2F6BCBL, 0xDBEE767CL,
+    0xE3A1CBC1L, 0xE760D676L, 0xEA23F0AFL, 0xEEE2ED18L,
+    0xF0A5BD1DL, 0xF464A0AAL, 0xF9278673L, 0xFDE69BC4L,
+    0x89B8FD09L, 0x8D79E0BEL, 0x803AC667L, 0x84FBDBD0L,
+    0x9ABC8BD5L, 0x9E7D9662L, 0x933EB0BBL, 0x97FFAD0CL,
+    0xAFB010B1L, 0xAB710D06L, 0xA6322BDFL, 0xA2F33668L,
+    0xBCB4666DL, 0xB8757BDAL, 0xB5365D03L, 0xB1F740B4L
+};
+#endif
+
+static DRFLAC_INLINE drflac_uint32 drflac_crc32_byte(drflac_uint32 crc32, drflac_uint8 data)
+{
+#ifndef DR_FLAC_NO_CRC
+    return (crc32 << 8) ^ drflac__crc32_table[(drflac_uint8)((crc32 >> 24) & 0xFF) ^ data];
+#else
+    (void)data;
+    return crc32;
+#endif
+}
+
+#if 0
+static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint32(drflac_uint32 crc32, drflac_uint32 data)
+{
+    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 24) & 0xFF));
+    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 16) & 0xFF));
+    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >>  8) & 0xFF));
+    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >>  0) & 0xFF));
+    return crc32;
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint64(drflac_uint32 crc32, drflac_uint64 data)
+{
+    crc32 = drflac_crc32_uint32(crc32, (drflac_uint32)((data >> 32) & 0xFFFFFFFF));
+    crc32 = drflac_crc32_uint32(crc32, (drflac_uint32)((data >>  0) & 0xFFFFFFFF));
+    return crc32;
+}
+#endif
+
+static DRFLAC_INLINE drflac_uint32 drflac_crc32_buffer(drflac_uint32 crc32, drflac_uint8* pData, drflac_uint32 dataSize)
+{
+    /* This can be optimized. */
+    drflac_uint32 i;
+    for (i = 0; i < dataSize; ++i) {
+        crc32 = drflac_crc32_byte(crc32, pData[i]);
+    }
+    return crc32;
+}
+
+
+static DRFLAC_INLINE drflac_bool32 drflac_ogg__is_capture_pattern(drflac_uint8 pattern[4])
+{
+    return pattern[0] == 'O' && pattern[1] == 'g' && pattern[2] == 'g' && pattern[3] == 'S';
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_header_size(drflac_ogg_page_header* pHeader)
+{
+    return 27 + pHeader->segmentCount;
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_body_size(drflac_ogg_page_header* pHeader)
+{
+    drflac_uint32 pageBodySize = 0;
+    int i;
+
+    for (i = 0; i < pHeader->segmentCount; ++i) {
+        pageBodySize += pHeader->segmentTable[i];
+    }
+
+    return pageBodySize;
+}
+
+static drflac_result drflac_ogg__read_page_header_after_capture_pattern(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32)
+{
+    drflac_uint8 data[23];
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(*pCRC32 == DRFLAC_OGG_CAPTURE_PATTERN_CRC32);
+
+    if (onRead(pUserData, data, 23) != 23) {
+        return DRFLAC_AT_END;
+    }
+    *pBytesRead += 23;
+
+    /*
+    It's not actually used, but set the capture pattern to 'OggS' for completeness. Not doing this will cause static analysers to complain about
+    us trying to access uninitialized data. We could alternatively just comment out this member of the drflac_ogg_page_header structure, but I
+    like to have it map to the structure of the underlying data.
+    */
+    pHeader->capturePattern[0] = 'O';
+    pHeader->capturePattern[1] = 'g';
+    pHeader->capturePattern[2] = 'g';
+    pHeader->capturePattern[3] = 'S';
+
+    pHeader->structureVersion = data[0];
+    pHeader->headerType       = data[1];
+    DRFLAC_COPY_MEMORY(&pHeader->granulePosition, &data[ 2], 8);
+    DRFLAC_COPY_MEMORY(&pHeader->serialNumber,    &data[10], 4);
+    DRFLAC_COPY_MEMORY(&pHeader->sequenceNumber,  &data[14], 4);
+    DRFLAC_COPY_MEMORY(&pHeader->checksum,        &data[18], 4);
+    pHeader->segmentCount     = data[22];
+
+    /* Calculate the CRC. Note that for the calculation the checksum part of the page needs to be set to 0. */
+    data[18] = 0;
+    data[19] = 0;
+    data[20] = 0;
+    data[21] = 0;
+
+    for (i = 0; i < 23; ++i) {
+        *pCRC32 = drflac_crc32_byte(*pCRC32, data[i]);
+    }
+
+
+    if (onRead(pUserData, pHeader->segmentTable, pHeader->segmentCount) != pHeader->segmentCount) {
+        return DRFLAC_AT_END;
+    }
+    *pBytesRead += pHeader->segmentCount;
+
+    for (i = 0; i < pHeader->segmentCount; ++i) {
+        *pCRC32 = drflac_crc32_byte(*pCRC32, pHeader->segmentTable[i]);
+    }
+
+    return DRFLAC_SUCCESS;
+}
+
+static drflac_result drflac_ogg__read_page_header(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32)
+{
+    drflac_uint8 id[4];
+
+    *pBytesRead = 0;
+
+    if (onRead(pUserData, id, 4) != 4) {
+        return DRFLAC_AT_END;
+    }
+    *pBytesRead += 4;
+
+    /* We need to read byte-by-byte until we find the OggS capture pattern. */
+    for (;;) {
+        if (drflac_ogg__is_capture_pattern(id)) {
+            drflac_result result;
+
+            *pCRC32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32;
+
+            result = drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, pHeader, pBytesRead, pCRC32);
+            if (result == DRFLAC_SUCCESS) {
+                return DRFLAC_SUCCESS;
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    continue;
+                } else {
+                    return result;
+                }
+            }
+        } else {
+            /* The first 4 bytes did not equal the capture pattern. Read the next byte and try again. */
+            id[0] = id[1];
+            id[1] = id[2];
+            id[2] = id[3];
+            if (onRead(pUserData, &id[3], 1) != 1) {
+                return DRFLAC_AT_END;
+            }
+            *pBytesRead += 1;
+        }
+    }
+}
+
+
+/*
+The main part of the Ogg encapsulation is the conversion from the physical Ogg bitstream to the native FLAC bitstream. It works
+in three general stages: Ogg Physical Bitstream -> Ogg/FLAC Logical Bitstream -> FLAC Native Bitstream. dr_flac is designed
+in such a way that the core sections assume everything is delivered in native format. Therefore, for each encapsulation type
+dr_flac is supporting there needs to be a layer sitting on top of the onRead and onSeek callbacks that ensures the bits read from
+the physical Ogg bitstream are converted and delivered in native FLAC format.
+*/
+typedef struct
+{
+    drflac_read_proc onRead;                /* The original onRead callback from drflac_open() and family. */
+    drflac_seek_proc onSeek;                /* The original onSeek callback from drflac_open() and family. */
+    void* pUserData;                        /* The user data passed on onRead and onSeek. This is the user data that was passed on drflac_open() and family. */
+    drflac_uint64 currentBytePos;           /* The position of the byte we are sitting on in the physical byte stream. Used for efficient seeking. */
+    drflac_uint64 firstBytePos;             /* The position of the first byte in the physical bitstream. Points to the start of the "OggS" identifier of the FLAC bos page. */
+    drflac_uint32 serialNumber;             /* The serial number of the FLAC audio pages. This is determined by the initial header page that was read during initialization. */
+    drflac_ogg_page_header bosPageHeader;   /* Used for seeking. */
+    drflac_ogg_page_header currentPageHeader;
+    drflac_uint32 bytesRemainingInPage;
+    drflac_uint32 pageDataSize;
+    drflac_uint8 pageData[DRFLAC_OGG_MAX_PAGE_SIZE];
+} drflac_oggbs; /* oggbs = Ogg Bitstream */
+
+static size_t drflac_oggbs__read_physical(drflac_oggbs* oggbs, void* bufferOut, size_t bytesToRead)
+{
+    size_t bytesActuallyRead = oggbs->onRead(oggbs->pUserData, bufferOut, bytesToRead);
+    oggbs->currentBytePos += bytesActuallyRead;
+
+    return bytesActuallyRead;
+}
+
+static drflac_bool32 drflac_oggbs__seek_physical(drflac_oggbs* oggbs, drflac_uint64 offset, drflac_seek_origin origin)
+{
+    if (origin == drflac_seek_origin_start) {
+        if (offset <= 0x7FFFFFFF) {
+            if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_start)) {
+                return DRFLAC_FALSE;
+            }
+            oggbs->currentBytePos = offset;
+
+            return DRFLAC_TRUE;
+        } else {
+            if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) {
+                return DRFLAC_FALSE;
+            }
+            oggbs->currentBytePos = offset;
+
+            return drflac_oggbs__seek_physical(oggbs, offset - 0x7FFFFFFF, drflac_seek_origin_current);
+        }
+    } else {
+        while (offset > 0x7FFFFFFF) {
+            if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;
+            }
+            oggbs->currentBytePos += 0x7FFFFFFF;
+            offset -= 0x7FFFFFFF;
+        }
+
+        if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_current)) {    /* <-- Safe cast thanks to the loop above. */
+            return DRFLAC_FALSE;
+        }
+        oggbs->currentBytePos += offset;
+
+        return DRFLAC_TRUE;
+    }
+}
+
+static drflac_bool32 drflac_oggbs__goto_next_page(drflac_oggbs* oggbs, drflac_ogg_crc_mismatch_recovery recoveryMethod)
+{
+    drflac_ogg_page_header header;
+    for (;;) {
+        drflac_uint32 crc32 = 0;
+        drflac_uint32 bytesRead;
+        drflac_uint32 pageBodySize;
+#ifndef DR_FLAC_NO_CRC
+        drflac_uint32 actualCRC32;
+#endif
+
+        if (drflac_ogg__read_page_header(oggbs->onRead, oggbs->pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
+            return DRFLAC_FALSE;
+        }
+        oggbs->currentBytePos += bytesRead;
+
+        pageBodySize = drflac_ogg__get_page_body_size(&header);
+        if (pageBodySize > DRFLAC_OGG_MAX_PAGE_SIZE) {
+            continue;   /* Invalid page size. Assume it's corrupted and just move to the next page. */
+        }
+
+        if (header.serialNumber != oggbs->serialNumber) {
+            /* It's not a FLAC page. Skip it. */
+            if (pageBodySize > 0 && !drflac_oggbs__seek_physical(oggbs, pageBodySize, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;
+            }
+            continue;
+        }
+
+
+        /* We need to read the entire page and then do a CRC check on it. If there's a CRC mismatch we need to skip this page. */
+        if (drflac_oggbs__read_physical(oggbs, oggbs->pageData, pageBodySize) != pageBodySize) {
+            return DRFLAC_FALSE;
+        }
+        oggbs->pageDataSize = pageBodySize;
+
+#ifndef DR_FLAC_NO_CRC
+        actualCRC32 = drflac_crc32_buffer(crc32, oggbs->pageData, oggbs->pageDataSize);
+        if (actualCRC32 != header.checksum) {
+            if (recoveryMethod == drflac_ogg_recover_on_crc_mismatch) {
+                continue;   /* CRC mismatch. Skip this page. */
+            } else {
+                /*
+                Even though we are failing on a CRC mismatch, we still want our stream to be in a good state. Therefore we
+                go to the next valid page to ensure we're in a good state, but return false to let the caller know that the
+                seek did not fully complete.
+                */
+                drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch);
+                return DRFLAC_FALSE;
+            }
+        }
+#else
+        (void)recoveryMethod;   /* <-- Silence a warning. */
+#endif
+
+        oggbs->currentPageHeader = header;
+        oggbs->bytesRemainingInPage = pageBodySize;
+        return DRFLAC_TRUE;
+    }
+}
+
+/* Function below is unused at the moment, but I might be re-adding it later. */
+#if 0
+static drflac_uint8 drflac_oggbs__get_current_segment_index(drflac_oggbs* oggbs, drflac_uint8* pBytesRemainingInSeg)
+{
+    drflac_uint32 bytesConsumedInPage = drflac_ogg__get_page_body_size(&oggbs->currentPageHeader) - oggbs->bytesRemainingInPage;
+    drflac_uint8 iSeg = 0;
+    drflac_uint32 iByte = 0;
+    while (iByte < bytesConsumedInPage) {
+        drflac_uint8 segmentSize = oggbs->currentPageHeader.segmentTable[iSeg];
+        if (iByte + segmentSize > bytesConsumedInPage) {
+            break;
+        } else {
+            iSeg += 1;
+            iByte += segmentSize;
+        }
+    }
+
+    *pBytesRemainingInSeg = oggbs->currentPageHeader.segmentTable[iSeg] - (drflac_uint8)(bytesConsumedInPage - iByte);
+    return iSeg;
+}
+
+static drflac_bool32 drflac_oggbs__seek_to_next_packet(drflac_oggbs* oggbs)
+{
+    /* The current packet ends when we get to the segment with a lacing value of < 255 which is not at the end of a page. */
+    for (;;) {
+        drflac_bool32 atEndOfPage = DRFLAC_FALSE;
+
+        drflac_uint8 bytesRemainingInSeg;
+        drflac_uint8 iFirstSeg = drflac_oggbs__get_current_segment_index(oggbs, &bytesRemainingInSeg);
+
+        drflac_uint32 bytesToEndOfPacketOrPage = bytesRemainingInSeg;
+        for (drflac_uint8 iSeg = iFirstSeg; iSeg < oggbs->currentPageHeader.segmentCount; ++iSeg) {
+            drflac_uint8 segmentSize = oggbs->currentPageHeader.segmentTable[iSeg];
+            if (segmentSize < 255) {
+                if (iSeg == oggbs->currentPageHeader.segmentCount-1) {
+                    atEndOfPage = DRFLAC_TRUE;
+                }
+
+                break;
+            }
+
+            bytesToEndOfPacketOrPage += segmentSize;
+        }
+
+        /*
+        At this point we will have found either the packet or the end of the page. If were at the end of the page we'll
+        want to load the next page and keep searching for the end of the packet.
+        */
+        drflac_oggbs__seek_physical(oggbs, bytesToEndOfPacketOrPage, drflac_seek_origin_current);
+        oggbs->bytesRemainingInPage -= bytesToEndOfPacketOrPage;
+
+        if (atEndOfPage) {
+            /*
+            We're potentially at the next packet, but we need to check the next page first to be sure because the packet may
+            straddle pages.
+            */
+            if (!drflac_oggbs__goto_next_page(oggbs)) {
+                return DRFLAC_FALSE;
+            }
+
+            /* If it's a fresh packet it most likely means we're at the next packet. */
+            if ((oggbs->currentPageHeader.headerType & 0x01) == 0) {
+                return DRFLAC_TRUE;
+            }
+        } else {
+            /* We're at the next packet. */
+            return DRFLAC_TRUE;
+        }
+    }
+}
+
+static drflac_bool32 drflac_oggbs__seek_to_next_frame(drflac_oggbs* oggbs)
+{
+    /* The bitstream should be sitting on the first byte just after the header of the frame. */
+
+    /* What we're actually doing here is seeking to the start of the next packet. */
+    return drflac_oggbs__seek_to_next_packet(oggbs);
+}
+#endif
+
+static size_t drflac__on_read_ogg(void* pUserData, void* bufferOut, size_t bytesToRead)
+{
+    drflac_oggbs* oggbs = (drflac_oggbs*)pUserData;
+    drflac_uint8* pRunningBufferOut = (drflac_uint8*)bufferOut;
+    size_t bytesRead = 0;
+
+    DRFLAC_ASSERT(oggbs != NULL);
+    DRFLAC_ASSERT(pRunningBufferOut != NULL);
+
+    /* Reading is done page-by-page. If we've run out of bytes in the page we need to move to the next one. */
+    while (bytesRead < bytesToRead) {
+        size_t bytesRemainingToRead = bytesToRead - bytesRead;
+
+        if (oggbs->bytesRemainingInPage >= bytesRemainingToRead) {
+            DRFLAC_COPY_MEMORY(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), bytesRemainingToRead);
+            bytesRead += bytesRemainingToRead;
+            oggbs->bytesRemainingInPage -= (drflac_uint32)bytesRemainingToRead;
+            break;
+        }
+
+        /* If we get here it means some of the requested data is contained in the next pages. */
+        if (oggbs->bytesRemainingInPage > 0) {
+            DRFLAC_COPY_MEMORY(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), oggbs->bytesRemainingInPage);
+            bytesRead += oggbs->bytesRemainingInPage;
+            pRunningBufferOut += oggbs->bytesRemainingInPage;
+            oggbs->bytesRemainingInPage = 0;
+        }
+
+        DRFLAC_ASSERT(bytesRemainingToRead > 0);
+        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
+            break;  /* Failed to go to the next page. Might have simply hit the end of the stream. */
+        }
+    }
+
+    return bytesRead;
+}
+
+static drflac_bool32 drflac__on_seek_ogg(void* pUserData, int offset, drflac_seek_origin origin)
+{
+    drflac_oggbs* oggbs = (drflac_oggbs*)pUserData;
+    int bytesSeeked = 0;
+
+    DRFLAC_ASSERT(oggbs != NULL);
+    DRFLAC_ASSERT(offset >= 0);  /* <-- Never seek backwards. */
+
+    /* Seeking is always forward which makes things a lot simpler. */
+    if (origin == drflac_seek_origin_start) {
+        if (!drflac_oggbs__seek_physical(oggbs, (int)oggbs->firstBytePos, drflac_seek_origin_start)) {
+            return DRFLAC_FALSE;
+        }
+
+        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) {
+            return DRFLAC_FALSE;
+        }
+
+        return drflac__on_seek_ogg(pUserData, offset, drflac_seek_origin_current);
+    }
+
+    DRFLAC_ASSERT(origin == drflac_seek_origin_current);
+
+    while (bytesSeeked < offset) {
+        int bytesRemainingToSeek = offset - bytesSeeked;
+        DRFLAC_ASSERT(bytesRemainingToSeek >= 0);
+
+        if (oggbs->bytesRemainingInPage >= (size_t)bytesRemainingToSeek) {
+            bytesSeeked += bytesRemainingToSeek;
+            (void)bytesSeeked;  /* <-- Silence a dead store warning emitted by Clang Static Analyzer. */
+            oggbs->bytesRemainingInPage -= bytesRemainingToSeek;
+            break;
+        }
+
+        /* If we get here it means some of the requested data is contained in the next pages. */
+        if (oggbs->bytesRemainingInPage > 0) {
+            bytesSeeked += (int)oggbs->bytesRemainingInPage;
+            oggbs->bytesRemainingInPage = 0;
+        }
+
+        DRFLAC_ASSERT(bytesRemainingToSeek > 0);
+        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) {
+            /* Failed to go to the next page. We either hit the end of the stream or had a CRC mismatch. */
+            return DRFLAC_FALSE;
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_bool32 drflac_ogg__seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex)
+{
+    drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
+    drflac_uint64 originalBytePos;
+    drflac_uint64 runningGranulePosition;
+    drflac_uint64 runningFrameBytePos;
+    drflac_uint64 runningPCMFrameCount;
+
+    DRFLAC_ASSERT(oggbs != NULL);
+
+    originalBytePos = oggbs->currentBytePos;   /* For recovery. Points to the OggS identifier. */
+
+    /* First seek to the first frame. */
+    if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes)) {
+        return DRFLAC_FALSE;
+    }
+    oggbs->bytesRemainingInPage = 0;
+
+    runningGranulePosition = 0;
+    for (;;) {
+        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
+            drflac_oggbs__seek_physical(oggbs, originalBytePos, drflac_seek_origin_start);
+            return DRFLAC_FALSE;   /* Never did find that sample... */
+        }
+
+        runningFrameBytePos = oggbs->currentBytePos - drflac_ogg__get_page_header_size(&oggbs->currentPageHeader) - oggbs->pageDataSize;
+        if (oggbs->currentPageHeader.granulePosition >= pcmFrameIndex) {
+            break; /* The sample is somewhere in the previous page. */
+        }
+
+        /*
+        At this point we know the sample is not in the previous page. It could possibly be in this page. For simplicity we
+        disregard any pages that do not begin a fresh packet.
+        */
+        if ((oggbs->currentPageHeader.headerType & 0x01) == 0) {    /* <-- Is it a fresh page? */
+            if (oggbs->currentPageHeader.segmentTable[0] >= 2) {
+                drflac_uint8 firstBytesInPage[2];
+                firstBytesInPage[0] = oggbs->pageData[0];
+                firstBytesInPage[1] = oggbs->pageData[1];
+
+                if ((firstBytesInPage[0] == 0xFF) && (firstBytesInPage[1] & 0xFC) == 0xF8) {    /* <-- Does the page begin with a frame's sync code? */
+                    runningGranulePosition = oggbs->currentPageHeader.granulePosition;
+                }
+
+                continue;
+            }
+        }
+    }
+
+    /*
+    We found the page that that is closest to the sample, so now we need to find it. The first thing to do is seek to the
+    start of that page. In the loop above we checked that it was a fresh page which means this page is also the start of
+    a new frame. This property means that after we've seeked to the page we can immediately start looping over frames until
+    we find the one containing the target sample.
+    */
+    if (!drflac_oggbs__seek_physical(oggbs, runningFrameBytePos, drflac_seek_origin_start)) {
+        return DRFLAC_FALSE;
+    }
+    if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
+        return DRFLAC_FALSE;
+    }
+
+    /*
+    At this point we'll be sitting on the first byte of the frame header of the first frame in the page. We just keep
+    looping over these frames until we find the one containing the sample we're after.
+    */
+    runningPCMFrameCount = runningGranulePosition;
+    for (;;) {
+        /*
+        There are two ways to find the sample and seek past irrelevant frames:
+          1) Use the native FLAC decoder.
+          2) Use Ogg's framing system.
+
+        Both of these options have their own pros and cons. Using the native FLAC decoder is slower because it needs to
+        do a full decode of the frame. Using Ogg's framing system is faster, but more complicated and involves some code
+        duplication for the decoding of frame headers.
+
+        Another thing to consider is that using the Ogg framing system will perform direct seeking of the physical Ogg
+        bitstream. This is important to consider because it means we cannot read data from the drflac_bs object using the
+        standard drflac__*() APIs because that will read in extra data for its own internal caching which in turn breaks
+        the positioning of the read pointer of the physical Ogg bitstream. Therefore, anything that would normally be read
+        using the native FLAC decoding APIs, such as drflac__read_next_flac_frame_header(), need to be re-implemented so as to
+        avoid the use of the drflac_bs object.
+
+        Considering these issues, I have decided to use the slower native FLAC decoding method for the following reasons:
+          1) Seeking is already partially accelerated using Ogg's paging system in the code block above.
+          2) Seeking in an Ogg encapsulated FLAC stream is probably quite uncommon.
+          3) Simplicity.
+        */
+        drflac_uint64 firstPCMFrameInFLACFrame = 0;
+        drflac_uint64 lastPCMFrameInFLACFrame = 0;
+        drflac_uint64 pcmFrameCountInThisFrame;
+
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+
+        drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame);
+
+        pcmFrameCountInThisFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
+
+        /* If we are seeking to the end of the file and we've just hit it, we're done. */
+        if (pcmFrameIndex == pFlac->totalPCMFrameCount && (runningPCMFrameCount + pcmFrameCountInThisFrame) == pFlac->totalPCMFrameCount) {
+            drflac_result result = drflac__decode_flac_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                pFlac->currentPCMFrame = pcmFrameIndex;
+                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
+                return DRFLAC_TRUE;
+            } else {
+                return DRFLAC_FALSE;
+            }
+        }
+
+        if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFrame)) {
+            /*
+            The sample should be in this FLAC frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend
+            it never existed and keep iterating.
+            */
+            drflac_result result = drflac__decode_flac_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */
+                drflac_uint64 pcmFramesToDecode = (size_t)(pcmFrameIndex - runningPCMFrameCount);    /* <-- Safe cast because the maximum number of samples in a frame is 65535. */
+                if (pcmFramesToDecode == 0) {
+                    return DRFLAC_TRUE;
+                }
+
+                pFlac->currentPCMFrame = runningPCMFrameCount;
+
+                return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;  /* <-- If this fails, something bad has happened (it should never fail). */
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    continue;   /* CRC mismatch. Pretend this frame never existed. */
+                } else {
+                    return DRFLAC_FALSE;
+                }
+            }
+        } else {
+            /*
+            It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
+            frame never existed and leave the running sample count untouched.
+            */
+            drflac_result result = drflac__seek_to_next_flac_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                runningPCMFrameCount += pcmFrameCountInThisFrame;
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    continue;   /* CRC mismatch. Pretend this frame never existed. */
+                } else {
+                    return DRFLAC_FALSE;
+                }
+            }
+        }
+    }
+}
+
+
+
+static drflac_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed)
+{
+    drflac_ogg_page_header header;
+    drflac_uint32 crc32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32;
+    drflac_uint32 bytesRead = 0;
+
+    /* Pre Condition: The bit stream should be sitting just past the 4-byte OggS capture pattern. */
+    (void)relaxed;
+
+    pInit->container = drflac_container_ogg;
+    pInit->oggFirstBytePos = 0;
+
+    /*
+    We'll get here if the first 4 bytes of the stream were the OggS capture pattern, however it doesn't necessarily mean the
+    stream includes FLAC encoded audio. To check for this we need to scan the beginning-of-stream page markers and check if
+    any match the FLAC specification. Important to keep in mind that the stream may be multiplexed.
+    */
+    if (drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
+        return DRFLAC_FALSE;
+    }
+    pInit->runningFilePos += bytesRead;
+
+    for (;;) {
+        int pageBodySize;
+
+        /* Break if we're past the beginning of stream page. */
+        if ((header.headerType & 0x02) == 0) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Check if it's a FLAC header. */
+        pageBodySize = drflac_ogg__get_page_body_size(&header);
+        if (pageBodySize == 51) {   /* 51 = the lacing value of the FLAC header packet. */
+            /* It could be a FLAC page... */
+            drflac_uint32 bytesRemainingInPage = pageBodySize;
+            drflac_uint8 packetType;
+
+            if (onRead(pUserData, &packetType, 1) != 1) {
+                return DRFLAC_FALSE;
+            }
+
+            bytesRemainingInPage -= 1;
+            if (packetType == 0x7F) {
+                /* Increasingly more likely to be a FLAC page... */
+                drflac_uint8 sig[4];
+                if (onRead(pUserData, sig, 4) != 4) {
+                    return DRFLAC_FALSE;
+                }
+
+                bytesRemainingInPage -= 4;
+                if (sig[0] == 'F' && sig[1] == 'L' && sig[2] == 'A' && sig[3] == 'C') {
+                    /* Almost certainly a FLAC page... */
+                    drflac_uint8 mappingVersion[2];
+                    if (onRead(pUserData, mappingVersion, 2) != 2) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (mappingVersion[0] != 1) {
+                        return DRFLAC_FALSE;   /* Only supporting version 1.x of the Ogg mapping. */
+                    }
+
+                    /*
+                    The next 2 bytes are the non-audio packets, not including this one. We don't care about this because we're going to
+                    be handling it in a generic way based on the serial number and packet types.
+                    */
+                    if (!onSeek(pUserData, 2, drflac_seek_origin_current)) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    /* Expecting the native FLAC signature "fLaC". */
+                    if (onRead(pUserData, sig, 4) != 4) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (sig[0] == 'f' && sig[1] == 'L' && sig[2] == 'a' && sig[3] == 'C') {
+                        /* The remaining data in the page should be the STREAMINFO block. */
+                        drflac_streaminfo streaminfo;
+                        drflac_uint8 isLastBlock;
+                        drflac_uint8 blockType;
+                        drflac_uint32 blockSize;
+                        if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) {
+                            return DRFLAC_FALSE;
+                        }
+
+                        if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) {
+                            return DRFLAC_FALSE;    /* Invalid block type. First block must be the STREAMINFO block. */
+                        }
+
+                        if (drflac__read_streaminfo(onRead, pUserData, &streaminfo)) {
+                            /* Success! */
+                            pInit->hasStreamInfoBlock      = DRFLAC_TRUE;
+                            pInit->sampleRate              = streaminfo.sampleRate;
+                            pInit->channels                = streaminfo.channels;
+                            pInit->bitsPerSample           = streaminfo.bitsPerSample;
+                            pInit->totalPCMFrameCount      = streaminfo.totalPCMFrameCount;
+                            pInit->maxBlockSizeInPCMFrames = streaminfo.maxBlockSizeInPCMFrames;
+                            pInit->hasMetadataBlocks       = !isLastBlock;
+
+                            if (onMeta) {
+                                drflac_metadata metadata;
+                                metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO;
+                                metadata.pRawData = NULL;
+                                metadata.rawDataSize = 0;
+                                metadata.data.streaminfo = streaminfo;
+                                onMeta(pUserDataMD, &metadata);
+                            }
+
+                            pInit->runningFilePos  += pageBodySize;
+                            pInit->oggFirstBytePos  = pInit->runningFilePos - 79;   /* Subtracting 79 will place us right on top of the "OggS" identifier of the FLAC bos page. */
+                            pInit->oggSerial        = header.serialNumber;
+                            pInit->oggBosHeader     = header;
+                            break;
+                        } else {
+                            /* Failed to read STREAMINFO block. Aww, so close... */
+                            return DRFLAC_FALSE;
+                        }
+                    } else {
+                        /* Invalid file. */
+                        return DRFLAC_FALSE;
+                    }
+                } else {
+                    /* Not a FLAC header. Skip it. */
+                    if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) {
+                        return DRFLAC_FALSE;
+                    }
+                }
+            } else {
+                /* Not a FLAC header. Seek past the entire page and move on to the next. */
+                if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) {
+                    return DRFLAC_FALSE;
+                }
+            }
+        } else {
+            if (!onSeek(pUserData, pageBodySize, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;
+            }
+        }
+
+        pInit->runningFilePos += pageBodySize;
+
+
+        /* Read the header of the next page. */
+        if (drflac_ogg__read_page_header(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
+            return DRFLAC_FALSE;
+        }
+        pInit->runningFilePos += bytesRead;
+    }
+
+    /*
+    If we get here it means we found a FLAC audio stream. We should be sitting on the first byte of the header of the next page. The next
+    packets in the FLAC logical stream contain the metadata. The only thing left to do in the initialization phase for Ogg is to create the
+    Ogg bistream object.
+    */
+    pInit->hasMetadataBlocks = DRFLAC_TRUE;    /* <-- Always have at least VORBIS_COMMENT metadata block. */
+    return DRFLAC_TRUE;
+}
+#endif
+
+static drflac_bool32 drflac__init_private(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD)
+{
+    drflac_bool32 relaxed;
+    drflac_uint8 id[4];
+
+    if (pInit == NULL || onRead == NULL || onSeek == NULL) {
+        return DRFLAC_FALSE;
+    }
+
+    DRFLAC_ZERO_MEMORY(pInit, sizeof(*pInit));
+    pInit->onRead       = onRead;
+    pInit->onSeek       = onSeek;
+    pInit->onMeta       = onMeta;
+    pInit->container    = container;
+    pInit->pUserData    = pUserData;
+    pInit->pUserDataMD  = pUserDataMD;
+
+    pInit->bs.onRead    = onRead;
+    pInit->bs.onSeek    = onSeek;
+    pInit->bs.pUserData = pUserData;
+    drflac__reset_cache(&pInit->bs);
+
+
+    /* If the container is explicitly defined then we can try opening in relaxed mode. */
+    relaxed = container != drflac_container_unknown;
+
+    /* Skip over any ID3 tags. */
+    for (;;) {
+        if (onRead(pUserData, id, 4) != 4) {
+            return DRFLAC_FALSE;    /* Ran out of data. */
+        }
+        pInit->runningFilePos += 4;
+
+        if (id[0] == 'I' && id[1] == 'D' && id[2] == '3') {
+            drflac_uint8 header[6];
+            drflac_uint8 flags;
+            drflac_uint32 headerSize;
+
+            if (onRead(pUserData, header, 6) != 6) {
+                return DRFLAC_FALSE;    /* Ran out of data. */
+            }
+            pInit->runningFilePos += 6;
+
+            flags = header[1];
+
+            DRFLAC_COPY_MEMORY(&headerSize, header+2, 4);
+            headerSize = drflac__unsynchsafe_32(drflac__be2host_32(headerSize));
+            if (flags & 0x10) {
+                headerSize += 10;
+            }
+
+            if (!onSeek(pUserData, headerSize, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;    /* Failed to seek past the tag. */
+            }
+            pInit->runningFilePos += headerSize;
+        } else {
+            break;
+        }
+    }
+
+    if (id[0] == 'f' && id[1] == 'L' && id[2] == 'a' && id[3] == 'C') {
+        return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
+    }
+#ifndef DR_FLAC_NO_OGG
+    if (id[0] == 'O' && id[1] == 'g' && id[2] == 'g' && id[3] == 'S') {
+        return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
+    }
+#endif
+
+    /* If we get here it means we likely don't have a header. Try opening in relaxed mode, if applicable. */
+    if (relaxed) {
+        if (container == drflac_container_native) {
+            return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
+        }
+#ifndef DR_FLAC_NO_OGG
+        if (container == drflac_container_ogg) {
+            return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
+        }
+#endif
+    }
+
+    /* Unsupported container. */
+    return DRFLAC_FALSE;
+}
+
+static void drflac__init_from_info(drflac* pFlac, const drflac_init_info* pInit)
+{
+    DRFLAC_ASSERT(pFlac != NULL);
+    DRFLAC_ASSERT(pInit != NULL);
+
+    DRFLAC_ZERO_MEMORY(pFlac, sizeof(*pFlac));
+    pFlac->bs                      = pInit->bs;
+    pFlac->onMeta                  = pInit->onMeta;
+    pFlac->pUserDataMD             = pInit->pUserDataMD;
+    pFlac->maxBlockSizeInPCMFrames = pInit->maxBlockSizeInPCMFrames;
+    pFlac->sampleRate              = pInit->sampleRate;
+    pFlac->channels                = (drflac_uint8)pInit->channels;
+    pFlac->bitsPerSample           = (drflac_uint8)pInit->bitsPerSample;
+    pFlac->totalPCMFrameCount      = pInit->totalPCMFrameCount;
+    pFlac->container               = pInit->container;
+}
+
+
+static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    static drflac_init_info init;
+    drflac_uint32 allocationSize;
+    drflac_uint32 wholeSIMDVectorCountPerChannel;
+    drflac_uint32 decodedSamplesAllocationSize;
+#ifndef DR_FLAC_NO_OGG
+    static drflac_oggbs oggbs;
+#endif
+    drflac_uint64 firstFramePos;
+    drflac_uint64 seektablePos;
+    drflac_uint32 seektableSize;
+    drflac_allocation_callbacks allocationCallbacks;
+    drflac* pFlac;
+
+    /* CPU support first. */
+    drflac__init_cpu_caps();
+
+    if (!drflac__init_private(&init, onRead, onSeek, onMeta, container, pUserData, pUserDataMD)) {
+        return NULL;
+    }
+
+    if (pAllocationCallbacks != NULL) {
+        allocationCallbacks = *pAllocationCallbacks;
+        if (allocationCallbacks.onFree == NULL || (allocationCallbacks.onMalloc == NULL && allocationCallbacks.onRealloc == NULL)) {
+            return NULL;    /* Invalid allocation callbacks. */
+        }
+    } else {
+        allocationCallbacks.pUserData = NULL;
+        allocationCallbacks.onMalloc  = drflac__malloc_default;
+        allocationCallbacks.onRealloc = drflac__realloc_default;
+        allocationCallbacks.onFree    = drflac__free_default;
+    }
+
+
+    /*
+    The size of the allocation for the drflac object needs to be large enough to fit the following:
+      1) The main members of the drflac structure
+      2) A block of memory large enough to store the decoded samples of the largest frame in the stream
+      3) If the container is Ogg, a drflac_oggbs object
+
+    The complicated part of the allocation is making sure there's enough room the decoded samples, taking into consideration
+    the different SIMD instruction sets.
+    */
+    allocationSize = sizeof(drflac);
+
+    /*
+    The allocation size for decoded frames depends on the number of 32-bit integers that fit inside the largest SIMD vector
+    we are supporting.
+    */
+    if ((init.maxBlockSizeInPCMFrames % (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) == 0) {
+        wholeSIMDVectorCountPerChannel = (init.maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32)));
+    } else {
+        wholeSIMDVectorCountPerChannel = (init.maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) + 1;
+    }
+
+    decodedSamplesAllocationSize = wholeSIMDVectorCountPerChannel * DRFLAC_MAX_SIMD_VECTOR_SIZE * init.channels;
+
+    allocationSize += decodedSamplesAllocationSize;
+    allocationSize += DRFLAC_MAX_SIMD_VECTOR_SIZE;  /* Allocate extra bytes to ensure we have enough for alignment. */
+
+#ifndef DR_FLAC_NO_OGG
+    /* There's additional data required for Ogg streams. */
+    if (init.container == drflac_container_ogg) {
+        allocationSize += sizeof(drflac_oggbs);
+    }
+
+    DRFLAC_ZERO_MEMORY(&oggbs, sizeof(oggbs));
+    if (init.container == drflac_container_ogg) {
+        oggbs.onRead = onRead;
+        oggbs.onSeek = onSeek;
+        oggbs.pUserData = pUserData;
+        oggbs.currentBytePos = init.oggFirstBytePos;
+        oggbs.firstBytePos = init.oggFirstBytePos;
+        oggbs.serialNumber = init.oggSerial;
+        oggbs.bosPageHeader = init.oggBosHeader;
+        oggbs.bytesRemainingInPage = 0;
+    }
+#endif
+
+    /*
+    This part is a bit awkward. We need to load the seektable so that it can be referenced in-memory, but I want the drflac object to
+    consist of only a single heap allocation. To this, the size of the seek table needs to be known, which we determine when reading
+    and decoding the metadata.
+    */
+    firstFramePos = 42;   /* <-- We know we are at byte 42 at this point. */
+    seektablePos  = 0;
+    seektableSize = 0;
+    if (init.hasMetadataBlocks) {
+        drflac_read_proc onReadOverride = onRead;
+        drflac_seek_proc onSeekOverride = onSeek;
+        void* pUserDataOverride = pUserData;
+
+#ifndef DR_FLAC_NO_OGG
+        if (init.container == drflac_container_ogg) {
+            onReadOverride = drflac__on_read_ogg;
+            onSeekOverride = drflac__on_seek_ogg;
+            pUserDataOverride = (void*)&oggbs;
+        }
+#endif
+
+        if (!drflac__read_and_decode_metadata(onReadOverride, onSeekOverride, onMeta, pUserDataOverride, pUserDataMD, &firstFramePos, &seektablePos, &seektableSize, &allocationCallbacks)) {
+            return NULL;
+        }
+
+        allocationSize += seektableSize;
+    }
+
+
+    pFlac = (drflac*)drflac__malloc_from_callbacks(allocationSize, &allocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    drflac__init_from_info(pFlac, &init);
+    pFlac->allocationCallbacks = allocationCallbacks;
+    pFlac->pDecodedSamples = (drflac_int32*)drflac_align((size_t)pFlac->pExtraData, DRFLAC_MAX_SIMD_VECTOR_SIZE);
+
+#ifndef DR_FLAC_NO_OGG
+    if (init.container == drflac_container_ogg) {
+        drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + seektableSize);
+        *pInternalOggbs = oggbs;
+
+        /* The Ogg bistream needs to be layered on top of the original bitstream. */
+        pFlac->bs.onRead = drflac__on_read_ogg;
+        pFlac->bs.onSeek = drflac__on_seek_ogg;
+        pFlac->bs.pUserData = (void*)pInternalOggbs;
+        pFlac->_oggbs = (void*)pInternalOggbs;
+    }
+#endif
+
+    pFlac->firstFLACFramePosInBytes = firstFramePos;
+
+    /* NOTE: Seektables are not currently compatible with Ogg encapsulation (Ogg has its own accelerated seeking system). I may change this later, so I'm leaving this here for now. */
+#ifndef DR_FLAC_NO_OGG
+    if (init.container == drflac_container_ogg)
+    {
+        pFlac->pSeekpoints = NULL;
+        pFlac->seekpointCount = 0;
+    }
+    else
+#endif
+    {
+        /* If we have a seektable we need to load it now, making sure we move back to where we were previously. */
+        if (seektablePos != 0) {
+            pFlac->seekpointCount = seektableSize / sizeof(*pFlac->pSeekpoints);
+            pFlac->pSeekpoints = (drflac_seekpoint*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize);
+
+            DRFLAC_ASSERT(pFlac->bs.onSeek != NULL);
+            DRFLAC_ASSERT(pFlac->bs.onRead != NULL);
+
+            /* Seek to the seektable, then just read directly into our seektable buffer. */
+            if (pFlac->bs.onSeek(pFlac->bs.pUserData, (int)seektablePos, drflac_seek_origin_start)) {
+                if (pFlac->bs.onRead(pFlac->bs.pUserData, pFlac->pSeekpoints, seektableSize) == seektableSize) {
+                    /* Endian swap. */
+                    drflac_uint32 iSeekpoint;
+                    for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) {
+                        pFlac->pSeekpoints[iSeekpoint].firstPCMFrame   = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].firstPCMFrame);
+                        pFlac->pSeekpoints[iSeekpoint].flacFrameOffset = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].flacFrameOffset);
+                        pFlac->pSeekpoints[iSeekpoint].pcmFrameCount   = drflac__be2host_16(pFlac->pSeekpoints[iSeekpoint].pcmFrameCount);
+                    }
+                } else {
+                    /* Failed to read the seektable. Pretend we don't have one. */
+                    pFlac->pSeekpoints = NULL;
+                    pFlac->seekpointCount = 0;
+                }
+
+                /* We need to seek back to where we were. If this fails it's a critical error. */
+                if (!pFlac->bs.onSeek(pFlac->bs.pUserData, (int)pFlac->firstFLACFramePosInBytes, drflac_seek_origin_start)) {
+                    drflac__free_from_callbacks(pFlac, &allocationCallbacks);
+                    return NULL;
+                }
+            } else {
+                /* Failed to seek to the seektable. Ominous sign, but for now we can just pretend we don't have one. */
+                pFlac->pSeekpoints = NULL;
+                pFlac->seekpointCount = 0;
+            }
+        }
+    }
+
+
+    /*
+    If we get here, but don't have a STREAMINFO block, it means we've opened the stream in relaxed mode and need to decode
+    the first frame.
+    */
+    if (!init.hasStreamInfoBlock) {
+        pFlac->currentFLACFrame.header = init.firstFrameHeader;
+        for (;;) {
+            drflac_result result = drflac__decode_flac_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                break;
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+                        drflac__free_from_callbacks(pFlac, &allocationCallbacks);
+                        return NULL;
+                    }
+                    continue;
+                } else {
+                    drflac__free_from_callbacks(pFlac, &allocationCallbacks);
+                    return NULL;
+                }
+            }
+        }
+    }
+
+    return pFlac;
+}
+
+
+
+#ifndef DR_FLAC_NO_STDIO
+#include <stdio.h>
+#include <wchar.h>      /* For wcslen(), wcsrtombs() */
+
+/* drflac_result_from_errno() is only used for fopen() and wfopen() so putting it inside DR_WAV_NO_STDIO for now. If something else needs this later we can move it out. */
+#include <errno.h>
+static drflac_result drflac_result_from_errno(int e)
+{
+    switch (e)
+    {
+        case 0: return DRFLAC_SUCCESS;
+    #ifdef EPERM
+        case EPERM: return DRFLAC_INVALID_OPERATION;
+    #endif
+    #ifdef ENOENT
+        case ENOENT: return DRFLAC_DOES_NOT_EXIST;
+    #endif
+    #ifdef ESRCH
+        case ESRCH: return DRFLAC_DOES_NOT_EXIST;
+    #endif
+    #ifdef EINTR
+        case EINTR: return DRFLAC_INTERRUPT;
+    #endif
+    #ifdef EIO
+        case EIO: return DRFLAC_IO_ERROR;
+    #endif
+    #ifdef ENXIO
+        case ENXIO: return DRFLAC_DOES_NOT_EXIST;
+    #endif
+    #ifdef E2BIG
+        case E2BIG: return DRFLAC_INVALID_ARGS;
+    #endif
+    #ifdef ENOEXEC
+        case ENOEXEC: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef EBADF
+        case EBADF: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef ECHILD
+        case ECHILD: return DRFLAC_ERROR;
+    #endif
+    #ifdef EAGAIN
+        case EAGAIN: return DRFLAC_UNAVAILABLE;
+    #endif
+    #ifdef ENOMEM
+        case ENOMEM: return DRFLAC_OUT_OF_MEMORY;
+    #endif
+    #ifdef EACCES
+        case EACCES: return DRFLAC_ACCESS_DENIED;
+    #endif
+    #ifdef EFAULT
+        case EFAULT: return DRFLAC_BAD_ADDRESS;
+    #endif
+    #ifdef ENOTBLK
+        case ENOTBLK: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBUSY
+        case EBUSY: return DRFLAC_BUSY;
+    #endif
+    #ifdef EEXIST
+        case EEXIST: return DRFLAC_ALREADY_EXISTS;
+    #endif
+    #ifdef EXDEV
+        case EXDEV: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENODEV
+        case ENODEV: return DRFLAC_DOES_NOT_EXIST;
+    #endif
+    #ifdef ENOTDIR
+        case ENOTDIR: return DRFLAC_NOT_DIRECTORY;
+    #endif
+    #ifdef EISDIR
+        case EISDIR: return DRFLAC_IS_DIRECTORY;
+    #endif
+    #ifdef EINVAL
+        case EINVAL: return DRFLAC_INVALID_ARGS;
+    #endif
+    #ifdef ENFILE
+        case ENFILE: return DRFLAC_TOO_MANY_OPEN_FILES;
+    #endif
+    #ifdef EMFILE
+        case EMFILE: return DRFLAC_TOO_MANY_OPEN_FILES;
+    #endif
+    #ifdef ENOTTY
+        case ENOTTY: return DRFLAC_INVALID_OPERATION;
+    #endif
+    #ifdef ETXTBSY
+        case ETXTBSY: return DRFLAC_BUSY;
+    #endif
+    #ifdef EFBIG
+        case EFBIG: return DRFLAC_TOO_BIG;
+    #endif
+    #ifdef ENOSPC
+        case ENOSPC: return DRFLAC_NO_SPACE;
+    #endif
+    #ifdef ESPIPE
+        case ESPIPE: return DRFLAC_BAD_SEEK;
+    #endif
+    #ifdef EROFS
+        case EROFS: return DRFLAC_ACCESS_DENIED;
+    #endif
+    #ifdef EMLINK
+        case EMLINK: return DRFLAC_TOO_MANY_LINKS;
+    #endif
+    #ifdef EPIPE
+        case EPIPE: return DRFLAC_BAD_PIPE;
+    #endif
+    #ifdef EDOM
+        case EDOM: return DRFLAC_OUT_OF_RANGE;
+    #endif
+    #ifdef ERANGE
+        case ERANGE: return DRFLAC_OUT_OF_RANGE;
+    #endif
+    #ifdef EDEADLK
+        case EDEADLK: return DRFLAC_DEADLOCK;
+    #endif
+    #ifdef ENAMETOOLONG
+        case ENAMETOOLONG: return DRFLAC_PATH_TOO_LONG;
+    #endif
+    #ifdef ENOLCK
+        case ENOLCK: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOSYS
+        case ENOSYS: return DRFLAC_NOT_IMPLEMENTED;
+    #endif
+    #ifdef ENOTEMPTY
+        case ENOTEMPTY: return DRFLAC_DIRECTORY_NOT_EMPTY;
+    #endif
+    #ifdef ELOOP
+        case ELOOP: return DRFLAC_TOO_MANY_LINKS;
+    #endif
+    #ifdef ENOMSG
+        case ENOMSG: return DRFLAC_NO_MESSAGE;
+    #endif
+    #ifdef EIDRM
+        case EIDRM: return DRFLAC_ERROR;
+    #endif
+    #ifdef ECHRNG
+        case ECHRNG: return DRFLAC_ERROR;
+    #endif
+    #ifdef EL2NSYNC
+        case EL2NSYNC: return DRFLAC_ERROR;
+    #endif
+    #ifdef EL3HLT
+        case EL3HLT: return DRFLAC_ERROR;
+    #endif
+    #ifdef EL3RST
+        case EL3RST: return DRFLAC_ERROR;
+    #endif
+    #ifdef ELNRNG
+        case ELNRNG: return DRFLAC_OUT_OF_RANGE;
+    #endif
+    #ifdef EUNATCH
+        case EUNATCH: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOCSI
+        case ENOCSI: return DRFLAC_ERROR;
+    #endif
+    #ifdef EL2HLT
+        case EL2HLT: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBADE
+        case EBADE: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBADR
+        case EBADR: return DRFLAC_ERROR;
+    #endif
+    #ifdef EXFULL
+        case EXFULL: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOANO
+        case ENOANO: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBADRQC
+        case EBADRQC: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBADSLT
+        case EBADSLT: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBFONT
+        case EBFONT: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef ENOSTR
+        case ENOSTR: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENODATA
+        case ENODATA: return DRFLAC_NO_DATA_AVAILABLE;
+    #endif
+    #ifdef ETIME
+        case ETIME: return DRFLAC_TIMEOUT;
+    #endif
+    #ifdef ENOSR
+        case ENOSR: return DRFLAC_NO_DATA_AVAILABLE;
+    #endif
+    #ifdef ENONET
+        case ENONET: return DRFLAC_NO_NETWORK;
+    #endif
+    #ifdef ENOPKG
+        case ENOPKG: return DRFLAC_ERROR;
+    #endif
+    #ifdef EREMOTE
+        case EREMOTE: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOLINK
+        case ENOLINK: return DRFLAC_ERROR;
+    #endif
+    #ifdef EADV
+        case EADV: return DRFLAC_ERROR;
+    #endif
+    #ifdef ESRMNT
+        case ESRMNT: return DRFLAC_ERROR;
+    #endif
+    #ifdef ECOMM
+        case ECOMM: return DRFLAC_ERROR;
+    #endif
+    #ifdef EPROTO
+        case EPROTO: return DRFLAC_ERROR;
+    #endif
+    #ifdef EMULTIHOP
+        case EMULTIHOP: return DRFLAC_ERROR;
+    #endif
+    #ifdef EDOTDOT
+        case EDOTDOT: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBADMSG
+        case EBADMSG: return DRFLAC_BAD_MESSAGE;
+    #endif
+    #ifdef EOVERFLOW
+        case EOVERFLOW: return DRFLAC_TOO_BIG;
+    #endif
+    #ifdef ENOTUNIQ
+        case ENOTUNIQ: return DRFLAC_NOT_UNIQUE;
+    #endif
+    #ifdef EBADFD
+        case EBADFD: return DRFLAC_ERROR;
+    #endif
+    #ifdef EREMCHG
+        case EREMCHG: return DRFLAC_ERROR;
+    #endif
+    #ifdef ELIBACC
+        case ELIBACC: return DRFLAC_ACCESS_DENIED;
+    #endif
+    #ifdef ELIBBAD
+        case ELIBBAD: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef ELIBSCN
+        case ELIBSCN: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef ELIBMAX
+        case ELIBMAX: return DRFLAC_ERROR;
+    #endif
+    #ifdef ELIBEXEC
+        case ELIBEXEC: return DRFLAC_ERROR;
+    #endif
+    #ifdef EILSEQ
+        case EILSEQ: return DRFLAC_INVALID_DATA;
+    #endif
+    #ifdef ERESTART
+        case ERESTART: return DRFLAC_ERROR;
+    #endif
+    #ifdef ESTRPIPE
+        case ESTRPIPE: return DRFLAC_ERROR;
+    #endif
+    #ifdef EUSERS
+        case EUSERS: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOTSOCK
+        case ENOTSOCK: return DRFLAC_NOT_SOCKET;
+    #endif
+    #ifdef EDESTADDRREQ
+        case EDESTADDRREQ: return DRFLAC_NO_ADDRESS;
+    #endif
+    #ifdef EMSGSIZE
+        case EMSGSIZE: return DRFLAC_TOO_BIG;
+    #endif
+    #ifdef EPROTOTYPE
+        case EPROTOTYPE: return DRFLAC_BAD_PROTOCOL;
+    #endif
+    #ifdef ENOPROTOOPT
+        case ENOPROTOOPT: return DRFLAC_PROTOCOL_UNAVAILABLE;
+    #endif
+    #ifdef EPROTONOSUPPORT
+        case EPROTONOSUPPORT: return DRFLAC_PROTOCOL_NOT_SUPPORTED;
+    #endif
+    #ifdef ESOCKTNOSUPPORT
+        case ESOCKTNOSUPPORT: return DRFLAC_SOCKET_NOT_SUPPORTED;
+    #endif
+    #ifdef EOPNOTSUPP
+        case EOPNOTSUPP: return DRFLAC_INVALID_OPERATION;
+    #endif
+    #ifdef EPFNOSUPPORT
+        case EPFNOSUPPORT: return DRFLAC_PROTOCOL_FAMILY_NOT_SUPPORTED;
+    #endif
+    #ifdef EAFNOSUPPORT
+        case EAFNOSUPPORT: return DRFLAC_ADDRESS_FAMILY_NOT_SUPPORTED;
+    #endif
+    #ifdef EADDRINUSE
+        case EADDRINUSE: return DRFLAC_ALREADY_IN_USE;
+    #endif
+    #ifdef EADDRNOTAVAIL
+        case EADDRNOTAVAIL: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENETDOWN
+        case ENETDOWN: return DRFLAC_NO_NETWORK;
+    #endif
+    #ifdef ENETUNREACH
+        case ENETUNREACH: return DRFLAC_NO_NETWORK;
+    #endif
+    #ifdef ENETRESET
+        case ENETRESET: return DRFLAC_NO_NETWORK;
+    #endif
+    #ifdef ECONNABORTED
+        case ECONNABORTED: return DRFLAC_NO_NETWORK;
+    #endif
+    #ifdef ECONNRESET
+        case ECONNRESET: return DRFLAC_CONNECTION_RESET;
+    #endif
+    #ifdef ENOBUFS
+        case ENOBUFS: return DRFLAC_NO_SPACE;
+    #endif
+    #ifdef EISCONN
+        case EISCONN: return DRFLAC_ALREADY_CONNECTED;
+    #endif
+    #ifdef ENOTCONN
+        case ENOTCONN: return DRFLAC_NOT_CONNECTED;
+    #endif
+    #ifdef ESHUTDOWN
+        case ESHUTDOWN: return DRFLAC_ERROR;
+    #endif
+    #ifdef ETOOMANYREFS
+        case ETOOMANYREFS: return DRFLAC_ERROR;
+    #endif
+    #ifdef ETIMEDOUT
+        case ETIMEDOUT: return DRFLAC_TIMEOUT;
+    #endif
+    #ifdef ECONNREFUSED
+        case ECONNREFUSED: return DRFLAC_CONNECTION_REFUSED;
+    #endif
+    #ifdef EHOSTDOWN
+        case EHOSTDOWN: return DRFLAC_NO_HOST;
+    #endif
+    #ifdef EHOSTUNREACH
+        case EHOSTUNREACH: return DRFLAC_NO_HOST;
+    #endif
+    #ifdef EALREADY
+        case EALREADY: return DRFLAC_IN_PROGRESS;
+    #endif
+    #ifdef EINPROGRESS
+        case EINPROGRESS: return DRFLAC_IN_PROGRESS;
+    #endif
+    #ifdef ESTALE
+        case ESTALE: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef EUCLEAN
+        case EUCLEAN: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOTNAM
+        case ENOTNAM: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENAVAIL
+        case ENAVAIL: return DRFLAC_ERROR;
+    #endif
+    #ifdef EISNAM
+        case EISNAM: return DRFLAC_ERROR;
+    #endif
+    #ifdef EREMOTEIO
+        case EREMOTEIO: return DRFLAC_IO_ERROR;
+    #endif
+    #ifdef EDQUOT
+        case EDQUOT: return DRFLAC_NO_SPACE;
+    #endif
+    #ifdef ENOMEDIUM
+        case ENOMEDIUM: return DRFLAC_DOES_NOT_EXIST;
+    #endif
+    #ifdef EMEDIUMTYPE
+        case EMEDIUMTYPE: return DRFLAC_ERROR;
+    #endif
+    #ifdef ECANCELED
+        case ECANCELED: return DRFLAC_CANCELLED;
+    #endif
+    #ifdef ENOKEY
+        case ENOKEY: return DRFLAC_ERROR;
+    #endif
+    #ifdef EKEYEXPIRED
+        case EKEYEXPIRED: return DRFLAC_ERROR;
+    #endif
+    #ifdef EKEYREVOKED
+        case EKEYREVOKED: return DRFLAC_ERROR;
+    #endif
+    #ifdef EKEYREJECTED
+        case EKEYREJECTED: return DRFLAC_ERROR;
+    #endif
+    #ifdef EOWNERDEAD
+        case EOWNERDEAD: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOTRECOVERABLE
+        case ENOTRECOVERABLE: return DRFLAC_ERROR;
+    #endif
+    #ifdef ERFKILL
+        case ERFKILL: return DRFLAC_ERROR;
+    #endif
+    #ifdef EHWPOISON
+        case EHWPOISON: return DRFLAC_ERROR;
+    #endif
+        default: return DRFLAC_ERROR;
+    }
+}
+
+static drflac_result drflac_fopen(FILE** ppFile, const char* pFilePath, const char* pOpenMode)
+{
+#if _MSC_VER && _MSC_VER >= 1400
+    errno_t err;
+#endif
+
+    if (ppFile != NULL) {
+        *ppFile = NULL;  /* Safety. */
+    }
+
+    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
+        return DRFLAC_INVALID_ARGS;
+    }
+
+#if _MSC_VER && _MSC_VER >= 1400
+    err = fopen_s(ppFile, pFilePath, pOpenMode);
+    if (err != 0) {
+        return drflac_result_from_errno(err);
+    }
+#else
+#if defined(_WIN32) || defined(__APPLE__)
+    *ppFile = fopen(pFilePath, pOpenMode);
+#else
+    #if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS == 64 && defined(_LARGEFILE64_SOURCE)
+        *ppFile = fopen64(pFilePath, pOpenMode);
+    #else
+        *ppFile = fopen(pFilePath, pOpenMode);
+    #endif
+#endif
+    if (*ppFile == NULL) {
+        drflac_result result = drflac_result_from_errno(errno);
+        if (result == DRFLAC_SUCCESS) {
+            result = DRFLAC_ERROR;   /* Just a safety check to make sure we never ever return success when pFile == NULL. */
+        }
+
+        return result;
+    }
+#endif
+
+    return DRFLAC_SUCCESS;
+}
+
+/*
+_wfopen() isn't always available in all compilation environments.
+
+    * Windows only.
+    * MSVC seems to support it universally as far back as VC6 from what I can tell (haven't checked further back).
+    * MinGW-64 (both 32- and 64-bit) seems to support it.
+    * MinGW wraps it in !defined(__STRICT_ANSI__).
+
+This can be reviewed as compatibility issues arise. The preference is to use _wfopen_s() and _wfopen() as opposed to the wcsrtombs()
+fallback, so if you notice your compiler not detecting this properly I'm happy to look at adding support.
+*/
+#if defined(_WIN32)
+    #if defined(_MSC_VER) || defined(__MINGW64__) || !defined(__STRICT_ANSI__)
+        #define DRFLAC_HAS_WFOPEN
+    #endif
+#endif
+
+static drflac_result drflac_wfopen(FILE** ppFile, const wchar_t* pFilePath, const wchar_t* pOpenMode, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    if (ppFile != NULL) {
+        *ppFile = NULL;  /* Safety. */
+    }
+
+    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
+        return DRFLAC_INVALID_ARGS;
+    }
+
+#if defined(DRFLAC_HAS_WFOPEN)
+    {
+        /* Use _wfopen() on Windows. */
+    #if defined(_MSC_VER) && _MSC_VER >= 1400
+        errno_t err = _wfopen_s(ppFile, pFilePath, pOpenMode);
+        if (err != 0) {
+            return drflac_result_from_errno(err);
+        }
+    #else
+        *ppFile = _wfopen(pFilePath, pOpenMode);
+        if (*ppFile == NULL) {
+            return drflac_result_from_errno(errno);
+        }
+    #endif
+        (void)pAllocationCallbacks;
+    }
+#else
+    /*
+    Use fopen() on anything other than Windows. Requires a conversion. This is annoying because fopen() is locale specific. The only real way I can
+    think of to do this is with wcsrtombs(). Note that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for
+    maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler error I'll look into improving compatibility.
+    */
+    {
+        mbstate_t mbs;
+        size_t lenMB;
+        const wchar_t* pFilePathTemp = pFilePath;
+        char* pFilePathMB = NULL;
+        char pOpenModeMB[32] = {0};
+
+        /* Get the length first. */
+        DRFLAC_ZERO_OBJECT(&mbs);
+        lenMB = wcsrtombs(NULL, &pFilePathTemp, 0, &mbs);
+        if (lenMB == (size_t)-1) {
+            return drflac_result_from_errno(errno);
+        }
+
+        pFilePathMB = (char*)drflac__malloc_from_callbacks(lenMB + 1, pAllocationCallbacks);
+        if (pFilePathMB == NULL) {
+            return DRFLAC_OUT_OF_MEMORY;
+        }
+
+        pFilePathTemp = pFilePath;
+        DRFLAC_ZERO_OBJECT(&mbs);
+        wcsrtombs(pFilePathMB, &pFilePathTemp, lenMB + 1, &mbs);
+
+        /* The open mode should always consist of ASCII characters so we should be able to do a trivial conversion. */
+        {
+            size_t i = 0;
+            for (;;) {
+                if (pOpenMode[i] == 0) {
+                    pOpenModeMB[i] = '\0';
+                    break;
+                }
+
+                pOpenModeMB[i] = (char)pOpenMode[i];
+                i += 1;
+            }
+        }
+
+        *ppFile = fopen(pFilePathMB, pOpenModeMB);
+
+        drflac__free_from_callbacks(pFilePathMB, pAllocationCallbacks);
+    }
+
+    if (*ppFile == NULL) {
+        return DRFLAC_ERROR;
+    }
+#endif
+
+    return DRFLAC_SUCCESS;
+}
+
+static size_t drflac__on_read_stdio(void* pUserData, void* bufferOut, size_t bytesToRead)
+{
+    return fread(bufferOut, 1, bytesToRead, (FILE*)pUserData);
+}
+
+static drflac_bool32 drflac__on_seek_stdio(void* pUserData, int offset, drflac_seek_origin origin)
+{
+    DRFLAC_ASSERT(offset >= 0);  /* <-- Never seek backwards. */
+
+    return fseek((FILE*)pUserData, offset, (origin == drflac_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0;
+}
+
+
+DRFLAC_API drflac* drflac_open_file(const char* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+    FILE* pFile;
+
+    if (drflac_fopen(&pFile, pFileName, "rb") != DRFLAC_SUCCESS) {
+        return NULL;
+    }
+
+    pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        fclose(pFile);
+        return NULL;
+    }
+
+    return pFlac;
+}
+
+DRFLAC_API drflac* drflac_open_file_w(const wchar_t* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+    FILE* pFile;
+
+    if (drflac_wfopen(&pFile, pFileName, L"rb", pAllocationCallbacks) != DRFLAC_SUCCESS) {
+        return NULL;
+    }
+
+    pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        fclose(pFile);
+        return NULL;
+    }
+
+    return pFlac;
+}
+
+DRFLAC_API drflac* drflac_open_file_with_metadata(const char* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+    FILE* pFile;
+
+    if (drflac_fopen(&pFile, pFileName, "rb") != DRFLAC_SUCCESS) {
+        return NULL;
+    }
+
+    pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, onMeta, drflac_container_unknown, (void*)pFile, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        fclose(pFile);
+        return pFlac;
+    }
+
+    return pFlac;
+}
+
+DRFLAC_API drflac* drflac_open_file_with_metadata_w(const wchar_t* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+    FILE* pFile;
+
+    if (drflac_wfopen(&pFile, pFileName, L"rb", pAllocationCallbacks) != DRFLAC_SUCCESS) {
+        return NULL;
+    }
+
+    pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, onMeta, drflac_container_unknown, (void*)pFile, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        fclose(pFile);
+        return pFlac;
+    }
+
+    return pFlac;
+}
+#endif  /* DR_FLAC_NO_STDIO */
+
+static size_t drflac__on_read_memory(void* pUserData, void* bufferOut, size_t bytesToRead)
+{
+    drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData;
+    size_t bytesRemaining;
+
+    DRFLAC_ASSERT(memoryStream != NULL);
+    DRFLAC_ASSERT(memoryStream->dataSize >= memoryStream->currentReadPos);
+
+    bytesRemaining = memoryStream->dataSize - memoryStream->currentReadPos;
+    if (bytesToRead > bytesRemaining) {
+        bytesToRead = bytesRemaining;
+    }
+
+    if (bytesToRead > 0) {
+        DRFLAC_COPY_MEMORY(bufferOut, memoryStream->data + memoryStream->currentReadPos, bytesToRead);
+        memoryStream->currentReadPos += bytesToRead;
+    }
+
+    return bytesToRead;
+}
+
+static drflac_bool32 drflac__on_seek_memory(void* pUserData, int offset, drflac_seek_origin origin)
+{
+    drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData;
+
+    DRFLAC_ASSERT(memoryStream != NULL);
+    DRFLAC_ASSERT(offset >= 0); /* <-- Never seek backwards. */
+
+    if (offset > (drflac_int64)memoryStream->dataSize) {
+        return DRFLAC_FALSE;
+    }
+
+    if (origin == drflac_seek_origin_current) {
+        if (memoryStream->currentReadPos + offset <= memoryStream->dataSize) {
+            memoryStream->currentReadPos += offset;
+        } else {
+            return DRFLAC_FALSE;  /* Trying to seek too far forward. */
+        }
+    } else {
+        if ((drflac_uint32)offset <= memoryStream->dataSize) {
+            memoryStream->currentReadPos = offset;
+        } else {
+            return DRFLAC_FALSE;  /* Trying to seek too far forward. */
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+
+DRFLAC_API drflac* drflac_open_memory(const void* pData, size_t dataSize, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac__memory_stream memoryStream;
+    drflac* pFlac;
+
+    memoryStream.data = (const drflac_uint8*)pData;
+    memoryStream.dataSize = dataSize;
+    memoryStream.currentReadPos = 0;
+    pFlac = drflac_open(drflac__on_read_memory, drflac__on_seek_memory, &memoryStream, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    pFlac->memoryStream = memoryStream;
+
+    /* This is an awful hack... */
+#ifndef DR_FLAC_NO_OGG
+    if (pFlac->container == drflac_container_ogg)
+    {
+        drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
+        oggbs->pUserData = &pFlac->memoryStream;
+    }
+    else
+#endif
+    {
+        pFlac->bs.pUserData = &pFlac->memoryStream;
+    }
+
+    return pFlac;
+}
+
+DRFLAC_API drflac* drflac_open_memory_with_metadata(const void* pData, size_t dataSize, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac__memory_stream memoryStream;
+    drflac* pFlac;
+
+    memoryStream.data = (const drflac_uint8*)pData;
+    memoryStream.dataSize = dataSize;
+    memoryStream.currentReadPos = 0;
+    pFlac = drflac_open_with_metadata_private(drflac__on_read_memory, drflac__on_seek_memory, onMeta, drflac_container_unknown, &memoryStream, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    pFlac->memoryStream = memoryStream;
+
+    /* This is an awful hack... */
+#ifndef DR_FLAC_NO_OGG
+    if (pFlac->container == drflac_container_ogg)
+    {
+        drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
+        oggbs->pUserData = &pFlac->memoryStream;
+    }
+    else
+#endif
+    {
+        pFlac->bs.pUserData = &pFlac->memoryStream;
+    }
+
+    return pFlac;
+}
+
+
+
+DRFLAC_API drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    return drflac_open_with_metadata_private(onRead, onSeek, NULL, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks);
+}
+DRFLAC_API drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    return drflac_open_with_metadata_private(onRead, onSeek, NULL, container, pUserData, pUserData, pAllocationCallbacks);
+}
+
+DRFLAC_API drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    return drflac_open_with_metadata_private(onRead, onSeek, onMeta, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks);
+}
+DRFLAC_API drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    return drflac_open_with_metadata_private(onRead, onSeek, onMeta, container, pUserData, pUserData, pAllocationCallbacks);
+}
+
+DRFLAC_API void drflac_close(drflac* pFlac)
+{
+    if (pFlac == NULL) {
+        return;
+    }
+
+#ifndef DR_FLAC_NO_STDIO
+    /*
+    If we opened the file with drflac_open_file() we will want to close the file handle. We can know whether or not drflac_open_file()
+    was used by looking at the callbacks.
+    */
+    if (pFlac->bs.onRead == drflac__on_read_stdio) {
+        fclose((FILE*)pFlac->bs.pUserData);
+    }
+
+#ifndef DR_FLAC_NO_OGG
+    /* Need to clean up Ogg streams a bit differently due to the way the bit streaming is chained. */
+    if (pFlac->container == drflac_container_ogg) {
+        drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
+        DRFLAC_ASSERT(pFlac->bs.onRead == drflac__on_read_ogg);
+
+        if (oggbs->onRead == drflac__on_read_stdio) {
+            fclose((FILE*)oggbs->pUserData);
+        }
+    }
+#endif
+#endif
+
+    drflac__free_from_callbacks(pFlac, &pFlac->allocationCallbacks);
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 left  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 side  = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 right0 = left0 - side0;
+        drflac_uint32 right1 = left1 - side1;
+        drflac_uint32 right2 = left2 - side2;
+        drflac_uint32 right3 = left3 - side3;
+
+        pOutputSamples[i*8+0] = (drflac_int32)left0;
+        pOutputSamples[i*8+1] = (drflac_int32)right0;
+        pOutputSamples[i*8+2] = (drflac_int32)left1;
+        pOutputSamples[i*8+3] = (drflac_int32)right1;
+        pOutputSamples[i*8+4] = (drflac_int32)left2;
+        pOutputSamples[i*8+5] = (drflac_int32)right2;
+        pOutputSamples[i*8+6] = (drflac_int32)left3;
+        pOutputSamples[i*8+7] = (drflac_int32)right3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i right = _mm_sub_epi32(left, side);
+
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t left;
+        uint32x4_t side;
+        uint32x4_t right;
+
+        left  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        right = vsubq_u32(left, side);
+
+        drflac__vst2q_u32((drflac_uint32*)pOutputSamples + i*8, vzipq_u32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s32__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 side  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 right = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 side0  = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 side1  = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 side2  = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 side3  = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 left0 = right0 + side0;
+        drflac_uint32 left1 = right1 + side1;
+        drflac_uint32 left2 = right2 + side2;
+        drflac_uint32 left3 = right3 + side3;
+
+        pOutputSamples[i*8+0] = (drflac_int32)left0;
+        pOutputSamples[i*8+1] = (drflac_int32)right0;
+        pOutputSamples[i*8+2] = (drflac_int32)left1;
+        pOutputSamples[i*8+3] = (drflac_int32)right1;
+        pOutputSamples[i*8+4] = (drflac_int32)left2;
+        pOutputSamples[i*8+5] = (drflac_int32)right2;
+        pOutputSamples[i*8+6] = (drflac_int32)left3;
+        pOutputSamples[i*8+7] = (drflac_int32)right3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i left  = _mm_add_epi32(right, side);
+
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t side;
+        uint32x4_t right;
+        uint32x4_t left;
+
+        side  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        left  = vaddq_u32(right, side);
+
+        drflac__vst2q_u32((drflac_uint32*)pOutputSamples + i*8, vzipq_u32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s32__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample);
+        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_int32 shift = unusedBitsPerSample;
+
+    if (shift > 0) {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = (mid0 + side0) << shift;
+            temp1L = (mid1 + side1) << shift;
+            temp2L = (mid2 + side2) << shift;
+            temp3L = (mid3 + side3) << shift;
+
+            temp0R = (mid0 - side0) << shift;
+            temp1R = (mid1 - side1) << shift;
+            temp2R = (mid2 - side2) << shift;
+            temp3R = (mid3 - side3) << shift;
+
+            pOutputSamples[i*8+0] = (drflac_int32)temp0L;
+            pOutputSamples[i*8+1] = (drflac_int32)temp0R;
+            pOutputSamples[i*8+2] = (drflac_int32)temp1L;
+            pOutputSamples[i*8+3] = (drflac_int32)temp1R;
+            pOutputSamples[i*8+4] = (drflac_int32)temp2L;
+            pOutputSamples[i*8+5] = (drflac_int32)temp2R;
+            pOutputSamples[i*8+6] = (drflac_int32)temp3L;
+            pOutputSamples[i*8+7] = (drflac_int32)temp3R;
+        }
+    } else {
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = (drflac_uint32)((drflac_int32)(mid0 + side0) >> 1);
+            temp1L = (drflac_uint32)((drflac_int32)(mid1 + side1) >> 1);
+            temp2L = (drflac_uint32)((drflac_int32)(mid2 + side2) >> 1);
+            temp3L = (drflac_uint32)((drflac_int32)(mid3 + side3) >> 1);
+
+            temp0R = (drflac_uint32)((drflac_int32)(mid0 - side0) >> 1);
+            temp1R = (drflac_uint32)((drflac_int32)(mid1 - side1) >> 1);
+            temp2R = (drflac_uint32)((drflac_int32)(mid2 - side2) >> 1);
+            temp3R = (drflac_uint32)((drflac_int32)(mid3 - side3) >> 1);
+
+            pOutputSamples[i*8+0] = (drflac_int32)temp0L;
+            pOutputSamples[i*8+1] = (drflac_int32)temp0R;
+            pOutputSamples[i*8+2] = (drflac_int32)temp1L;
+            pOutputSamples[i*8+3] = (drflac_int32)temp1R;
+            pOutputSamples[i*8+4] = (drflac_int32)temp2L;
+            pOutputSamples[i*8+5] = (drflac_int32)temp2R;
+            pOutputSamples[i*8+6] = (drflac_int32)temp3L;
+            pOutputSamples[i*8+7] = (drflac_int32)temp3R;
+        }
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample);
+        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample);
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_int32 shift = unusedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i left;
+            __m128i right;
+
+            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            left  = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
+            right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
+
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)(mid + side) >> 1;
+            pOutputSamples[i*2+1] = (drflac_int32)(mid - side) >> 1;
+        }
+    } else {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i left;
+            __m128i right;
+
+            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            left  = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
+            right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
+
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift);
+            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift);
+        }
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_int32 shift = unusedBitsPerSample;
+    int32x4_t  wbpsShift0_4; /* wbps = Wasted Bits Per Sample */
+    int32x4_t  wbpsShift1_4; /* wbps = Wasted Bits Per Sample */
+    uint32x4_t one4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    wbpsShift0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+    wbpsShift1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+    one4         = vdupq_n_u32(1);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            uint32x4_t mid;
+            uint32x4_t side;
+            int32x4_t left;
+            int32x4_t right;
+
+            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
+            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
+
+            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, one4));
+
+            left  = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
+            right = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
+
+            drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)(mid + side) >> 1;
+            pOutputSamples[i*2+1] = (drflac_int32)(mid - side) >> 1;
+        }
+    } else {
+        int32x4_t shift4;
+
+        shift -= 1;
+        shift4 = vdupq_n_s32(shift);
+
+        for (i = 0; i < frameCount4; ++i) {
+            uint32x4_t mid;
+            uint32x4_t side;
+            int32x4_t left;
+            int32x4_t right;
+
+            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
+            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
+
+            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, one4));
+
+            left  = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
+            right = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
+
+            drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift);
+            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift);
+        }
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s32__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample));
+        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample));
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1;
+
+        pOutputSamples[i*8+0] = (drflac_int32)tempL0;
+        pOutputSamples[i*8+1] = (drflac_int32)tempR0;
+        pOutputSamples[i*8+2] = (drflac_int32)tempL1;
+        pOutputSamples[i*8+3] = (drflac_int32)tempR1;
+        pOutputSamples[i*8+4] = (drflac_int32)tempL2;
+        pOutputSamples[i*8+5] = (drflac_int32)tempR2;
+        pOutputSamples[i*8+6] = (drflac_int32)tempL3;
+        pOutputSamples[i*8+7] = (drflac_int32)tempR3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0);
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1);
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0);
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1);
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    int32x4_t shift4_0 = vdupq_n_s32(shift0);
+    int32x4_t shift4_1 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        int32x4_t left;
+        int32x4_t right;
+
+        left  = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift4_0));
+        right = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift4_1));
+
+        drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0);
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s32__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut)
+{
+    drflac_uint64 framesRead;
+    drflac_uint32 unusedBitsPerSample;
+
+    if (pFlac == NULL || framesToRead == 0) {
+        return 0;
+    }
+
+    if (pBufferOut == NULL) {
+        return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead);
+    }
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 32);
+    unusedBitsPerSample = 32 - pFlac->bitsPerSample;
+
+    framesRead = 0;
+    while (framesToRead > 0) {
+        /* If we've run out of samples in this frame, go to the next. */
+        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                break;  /* Couldn't read the next frame, so just break from the loop and return. */
+            }
+        } else {
+            unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
+            drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining;
+            drflac_uint64 frameCountThisIteration = framesToRead;
+
+            if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) {
+                frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining;
+            }
+
+            if (channelCount == 2) {
+                const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame;
+                const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame;
+
+                switch (pFlac->currentFLACFrame.header.channelAssignment)
+                {
+                    case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE:
+                    {
+                        drflac_read_pcm_frames_s32__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
+                    {
+                        drflac_read_pcm_frames_s32__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE:
+                    {
+                        drflac_read_pcm_frames_s32__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT:
+                    default:
+                    {
+                        drflac_read_pcm_frames_s32__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+                }
+            } else {
+                /* Generic interleaving. */
+                drflac_uint64 i;
+                for (i = 0; i < frameCountThisIteration; ++i) {
+                    unsigned int j;
+                    for (j = 0; j < channelCount; ++j) {
+                        pBufferOut[(i*channelCount)+j] = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample));
+                    }
+                }
+            }
+
+            framesRead                += frameCountThisIteration;
+            pBufferOut                += frameCountThisIteration * channelCount;
+            framesToRead              -= frameCountThisIteration;
+            pFlac->currentPCMFrame    += frameCountThisIteration;
+            pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)frameCountThisIteration;
+        }
+    }
+
+    return framesRead;
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 left  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 side  = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 right = left - side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 right0 = left0 - side0;
+        drflac_uint32 right1 = left1 - side1;
+        drflac_uint32 right2 = left2 - side2;
+        drflac_uint32 right3 = left3 - side3;
+
+        left0  >>= 16;
+        left1  >>= 16;
+        left2  >>= 16;
+        left3  >>= 16;
+
+        right0 >>= 16;
+        right1 >>= 16;
+        right2 >>= 16;
+        right3 >>= 16;
+
+        pOutputSamples[i*8+0] = (drflac_int16)left0;
+        pOutputSamples[i*8+1] = (drflac_int16)right0;
+        pOutputSamples[i*8+2] = (drflac_int16)left1;
+        pOutputSamples[i*8+3] = (drflac_int16)right1;
+        pOutputSamples[i*8+4] = (drflac_int16)left2;
+        pOutputSamples[i*8+5] = (drflac_int16)right2;
+        pOutputSamples[i*8+6] = (drflac_int16)left3;
+        pOutputSamples[i*8+7] = (drflac_int16)right3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i right = _mm_sub_epi32(left, side);
+
+        left  = _mm_srai_epi32(left,  16);
+        right = _mm_srai_epi32(right, 16);
+
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t left;
+        uint32x4_t side;
+        uint32x4_t right;
+
+        left  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        right = vsubq_u32(left, side);
+
+        left  = vshrq_n_u32(left,  16);
+        right = vshrq_n_u32(right, 16);
+
+        drflac__vst2q_u16((drflac_uint16*)pOutputSamples + i*8, vzip_u16(vmovn_u32(left), vmovn_u32(right)));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s16__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s16__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 side  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 right = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 left  = right + side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 side0  = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 side1  = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 side2  = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 side3  = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 left0 = right0 + side0;
+        drflac_uint32 left1 = right1 + side1;
+        drflac_uint32 left2 = right2 + side2;
+        drflac_uint32 left3 = right3 + side3;
+
+        left0  >>= 16;
+        left1  >>= 16;
+        left2  >>= 16;
+        left3  >>= 16;
+
+        right0 >>= 16;
+        right1 >>= 16;
+        right2 >>= 16;
+        right3 >>= 16;
+
+        pOutputSamples[i*8+0] = (drflac_int16)left0;
+        pOutputSamples[i*8+1] = (drflac_int16)right0;
+        pOutputSamples[i*8+2] = (drflac_int16)left1;
+        pOutputSamples[i*8+3] = (drflac_int16)right1;
+        pOutputSamples[i*8+4] = (drflac_int16)left2;
+        pOutputSamples[i*8+5] = (drflac_int16)right2;
+        pOutputSamples[i*8+6] = (drflac_int16)left3;
+        pOutputSamples[i*8+7] = (drflac_int16)right3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i left  = _mm_add_epi32(right, side);
+
+        left  = _mm_srai_epi32(left,  16);
+        right = _mm_srai_epi32(right, 16);
+
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t side;
+        uint32x4_t right;
+        uint32x4_t left;
+
+        side  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        left  = vaddq_u32(right, side);
+
+        left  = vshrq_n_u32(left,  16);
+        right = vshrq_n_u32(right, 16);
+
+        drflac__vst2q_u16((drflac_uint16*)pOutputSamples + i*8, vzip_u16(vmovn_u32(left), vmovn_u32(right)));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s16__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s16__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        drflac_uint32 mid  = (drflac_uint32)pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = (drflac_uint32)pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) >> 16);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample;
+
+    if (shift > 0) {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = (mid0 + side0) << shift;
+            temp1L = (mid1 + side1) << shift;
+            temp2L = (mid2 + side2) << shift;
+            temp3L = (mid3 + side3) << shift;
+
+            temp0R = (mid0 - side0) << shift;
+            temp1R = (mid1 - side1) << shift;
+            temp2R = (mid2 - side2) << shift;
+            temp3R = (mid3 - side3) << shift;
+
+            temp0L >>= 16;
+            temp1L >>= 16;
+            temp2L >>= 16;
+            temp3L >>= 16;
+
+            temp0R >>= 16;
+            temp1R >>= 16;
+            temp2R >>= 16;
+            temp3R >>= 16;
+
+            pOutputSamples[i*8+0] = (drflac_int16)temp0L;
+            pOutputSamples[i*8+1] = (drflac_int16)temp0R;
+            pOutputSamples[i*8+2] = (drflac_int16)temp1L;
+            pOutputSamples[i*8+3] = (drflac_int16)temp1R;
+            pOutputSamples[i*8+4] = (drflac_int16)temp2L;
+            pOutputSamples[i*8+5] = (drflac_int16)temp2R;
+            pOutputSamples[i*8+6] = (drflac_int16)temp3L;
+            pOutputSamples[i*8+7] = (drflac_int16)temp3R;
+        }
+    } else {
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = ((drflac_int32)(mid0 + side0) >> 1);
+            temp1L = ((drflac_int32)(mid1 + side1) >> 1);
+            temp2L = ((drflac_int32)(mid2 + side2) >> 1);
+            temp3L = ((drflac_int32)(mid3 + side3) >> 1);
+
+            temp0R = ((drflac_int32)(mid0 - side0) >> 1);
+            temp1R = ((drflac_int32)(mid1 - side1) >> 1);
+            temp2R = ((drflac_int32)(mid2 - side2) >> 1);
+            temp3R = ((drflac_int32)(mid3 - side3) >> 1);
+
+            temp0L >>= 16;
+            temp1L >>= 16;
+            temp2L >>= 16;
+            temp3L >>= 16;
+
+            temp0R >>= 16;
+            temp1R >>= 16;
+            temp2R >>= 16;
+            temp3R >>= 16;
+
+            pOutputSamples[i*8+0] = (drflac_int16)temp0L;
+            pOutputSamples[i*8+1] = (drflac_int16)temp0R;
+            pOutputSamples[i*8+2] = (drflac_int16)temp1L;
+            pOutputSamples[i*8+3] = (drflac_int16)temp1R;
+            pOutputSamples[i*8+4] = (drflac_int16)temp2L;
+            pOutputSamples[i*8+5] = (drflac_int16)temp2R;
+            pOutputSamples[i*8+6] = (drflac_int16)temp3L;
+            pOutputSamples[i*8+7] = (drflac_int16)temp3R;
+        }
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) >> 16);
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i left;
+            __m128i right;
+
+            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            left  = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
+            right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
+
+            left  = _mm_srai_epi32(left,  16);
+            right = _mm_srai_epi32(right, 16);
+
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int16)(((drflac_int32)(mid + side) >> 1) >> 16);
+            pOutputSamples[i*2+1] = (drflac_int16)(((drflac_int32)(mid - side) >> 1) >> 16);
+        }
+    } else {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i left;
+            __m128i right;
+
+            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            left  = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
+            right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
+
+            left  = _mm_srai_epi32(left,  16);
+            right = _mm_srai_epi32(right, 16);
+
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) << shift) >> 16);
+            pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) << shift) >> 16);
+        }
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample;
+    int32x4_t wbpsShift0_4; /* wbps = Wasted Bits Per Sample */
+    int32x4_t wbpsShift1_4; /* wbps = Wasted Bits Per Sample */
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    wbpsShift0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+    wbpsShift1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            uint32x4_t mid;
+            uint32x4_t side;
+            int32x4_t left;
+            int32x4_t right;
+
+            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
+            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
+
+            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
+
+            left  = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
+            right = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
+
+            left  = vshrq_n_s32(left,  16);
+            right = vshrq_n_s32(right, 16);
+
+            drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int16)(((drflac_int32)(mid + side) >> 1) >> 16);
+            pOutputSamples[i*2+1] = (drflac_int16)(((drflac_int32)(mid - side) >> 1) >> 16);
+        }
+    } else {
+        int32x4_t shift4;
+
+        shift -= 1;
+        shift4 = vdupq_n_s32(shift);
+
+        for (i = 0; i < frameCount4; ++i) {
+            uint32x4_t mid;
+            uint32x4_t side;
+            int32x4_t left;
+            int32x4_t right;
+
+            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
+            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
+
+            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
+
+            left  = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
+            right = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
+
+            left  = vshrq_n_s32(left,  16);
+            right = vshrq_n_s32(right, 16);
+
+            drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) << shift) >> 16);
+            pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) << shift) >> 16);
+        }
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s16__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s16__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int16)((drflac_int32)((drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample)) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)((drflac_int32)((drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample)) >> 16);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1;
+
+        tempL0 >>= 16;
+        tempL1 >>= 16;
+        tempL2 >>= 16;
+        tempL3 >>= 16;
+
+        tempR0 >>= 16;
+        tempR1 >>= 16;
+        tempR2 >>= 16;
+        tempR3 >>= 16;
+
+        pOutputSamples[i*8+0] = (drflac_int16)tempL0;
+        pOutputSamples[i*8+1] = (drflac_int16)tempR0;
+        pOutputSamples[i*8+2] = (drflac_int16)tempL1;
+        pOutputSamples[i*8+3] = (drflac_int16)tempR1;
+        pOutputSamples[i*8+4] = (drflac_int16)tempL2;
+        pOutputSamples[i*8+5] = (drflac_int16)tempR2;
+        pOutputSamples[i*8+6] = (drflac_int16)tempL3;
+        pOutputSamples[i*8+7] = (drflac_int16)tempR3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16);
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+
+        left  = _mm_srai_epi32(left,  16);
+        right = _mm_srai_epi32(right, 16);
+
+        /* At this point we have results. We can now pack and interleave these into a single __m128i object and then store the in the output buffer. */
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16);
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    int32x4_t shift0_4 = vdupq_n_s32(shift0);
+    int32x4_t shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        int32x4_t left;
+        int32x4_t right;
+
+        left  = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4));
+        right = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4));
+
+        left  = vshrq_n_s32(left,  16);
+        right = vshrq_n_s32(right, 16);
+
+        drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s16__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s16__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut)
+{
+    drflac_uint64 framesRead;
+    drflac_uint32 unusedBitsPerSample;
+
+    if (pFlac == NULL || framesToRead == 0) {
+        return 0;
+    }
+
+    if (pBufferOut == NULL) {
+        return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead);
+    }
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 32);
+    unusedBitsPerSample = 32 - pFlac->bitsPerSample;
+
+    framesRead = 0;
+    while (framesToRead > 0) {
+        /* If we've run out of samples in this frame, go to the next. */
+        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                break;  /* Couldn't read the next frame, so just break from the loop and return. */
+            }
+        } else {
+            unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
+            drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining;
+            drflac_uint64 frameCountThisIteration = framesToRead;
+
+            if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) {
+                frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining;
+            }
+
+            if (channelCount == 2) {
+                const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame;
+                const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame;
+
+                switch (pFlac->currentFLACFrame.header.channelAssignment)
+                {
+                    case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE:
+                    {
+                        drflac_read_pcm_frames_s16__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
+                    {
+                        drflac_read_pcm_frames_s16__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE:
+                    {
+                        drflac_read_pcm_frames_s16__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT:
+                    default:
+                    {
+                        drflac_read_pcm_frames_s16__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+                }
+            } else {
+                /* Generic interleaving. */
+                drflac_uint64 i;
+                for (i = 0; i < frameCountThisIteration; ++i) {
+                    unsigned int j;
+                    for (j = 0; j < channelCount; ++j) {
+                        drflac_int32 sampleS32 = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample));
+                        pBufferOut[(i*channelCount)+j] = (drflac_int16)(sampleS32 >> 16);
+                    }
+                }
+            }
+
+            framesRead                += frameCountThisIteration;
+            pBufferOut                += frameCountThisIteration * channelCount;
+            framesToRead              -= frameCountThisIteration;
+            pFlac->currentPCMFrame    += frameCountThisIteration;
+            pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)frameCountThisIteration;
+        }
+    }
+
+    return framesRead;
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 left  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 side  = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (double)((drflac_int32)left  / 2147483648.0);
+        pOutputSamples[i*2+1] = (double)((drflac_int32)right / 2147483648.0);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    double factor = 1 / 2147483648.0;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 right0 = left0 - side0;
+        drflac_uint32 right1 = left1 - side1;
+        drflac_uint32 right2 = left2 - side2;
+        drflac_uint32 right3 = left3 - side3;
+
+        pOutputSamples[i*8+0] = (drflac_int32)left0  * factor;
+        pOutputSamples[i*8+1] = (drflac_int32)right0 * factor;
+        pOutputSamples[i*8+2] = (drflac_int32)left1  * factor;
+        pOutputSamples[i*8+3] = (drflac_int32)right1 * factor;
+        pOutputSamples[i*8+4] = (drflac_int32)left2  * factor;
+        pOutputSamples[i*8+5] = (drflac_int32)right2 * factor;
+        pOutputSamples[i*8+6] = (drflac_int32)left3  * factor;
+        pOutputSamples[i*8+7] = (drflac_int32)right3 * factor;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)right * factor;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+    __m128 factor;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor = _mm_set1_ps(1.0f / 8388608.0f);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i right = _mm_sub_epi32(left, side);
+        __m128 leftf  = _mm_mul_ps(_mm_cvtepi32_ps(left),  factor);
+        __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor);
+
+        _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
+        _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
+        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+    float32x4_t factor4;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor4  = vdupq_n_f32(1.0f / 8388608.0f);
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t left;
+        uint32x4_t side;
+        uint32x4_t right;
+        float32x4_t leftf;
+        float32x4_t rightf;
+
+        left   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        side   = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        right  = vsubq_u32(left, side);
+        leftf  = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(left)),  factor4);
+        rightf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(right)), factor4);
+
+        drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
+        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_f32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_f32__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 side  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 right = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (double)((drflac_int32)left  / 2147483648.0);
+        pOutputSamples[i*2+1] = (double)((drflac_int32)right / 2147483648.0);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    double factor = 1 / 2147483648.0;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 side0  = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 side1  = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 side2  = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 side3  = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 left0 = right0 + side0;
+        drflac_uint32 left1 = right1 + side1;
+        drflac_uint32 left2 = right2 + side2;
+        drflac_uint32 left3 = right3 + side3;
+
+        pOutputSamples[i*8+0] = (drflac_int32)left0  * factor;
+        pOutputSamples[i*8+1] = (drflac_int32)right0 * factor;
+        pOutputSamples[i*8+2] = (drflac_int32)left1  * factor;
+        pOutputSamples[i*8+3] = (drflac_int32)right1 * factor;
+        pOutputSamples[i*8+4] = (drflac_int32)left2  * factor;
+        pOutputSamples[i*8+5] = (drflac_int32)right2 * factor;
+        pOutputSamples[i*8+6] = (drflac_int32)left3  * factor;
+        pOutputSamples[i*8+7] = (drflac_int32)right3 * factor;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)right * factor;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+    __m128 factor;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor = _mm_set1_ps(1.0f / 8388608.0f);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i left  = _mm_add_epi32(right, side);
+        __m128 leftf  = _mm_mul_ps(_mm_cvtepi32_ps(left),  factor);
+        __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor);
+
+        _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
+        _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
+        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+    float32x4_t factor4;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor4  = vdupq_n_f32(1.0f / 8388608.0f);
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t side;
+        uint32x4_t right;
+        uint32x4_t left;
+        float32x4_t leftf;
+        float32x4_t rightf;
+
+        side   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        right  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        left   = vaddq_u32(right, side);
+        leftf  = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(left)),  factor4);
+        rightf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(right)), factor4);
+
+        drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
+        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_f32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_f32__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        drflac_uint32 mid  = (drflac_uint32)pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = (drflac_uint32)pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (double)((((drflac_int32)(mid + side) >> 1) << (unusedBitsPerSample)) / 2147483648.0);
+        pOutputSamples[i*2+1] = (double)((((drflac_int32)(mid - side) >> 1) << (unusedBitsPerSample)) / 2147483648.0);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample;
+    double factor = 1 / 2147483648.0;
+
+    if (shift > 0) {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = (mid0 + side0) << shift;
+            temp1L = (mid1 + side1) << shift;
+            temp2L = (mid2 + side2) << shift;
+            temp3L = (mid3 + side3) << shift;
+
+            temp0R = (mid0 - side0) << shift;
+            temp1R = (mid1 - side1) << shift;
+            temp2R = (mid2 - side2) << shift;
+            temp3R = (mid3 - side3) << shift;
+
+            pOutputSamples[i*8+0] = (drflac_int32)temp0L * factor;
+            pOutputSamples[i*8+1] = (drflac_int32)temp0R * factor;
+            pOutputSamples[i*8+2] = (drflac_int32)temp1L * factor;
+            pOutputSamples[i*8+3] = (drflac_int32)temp1R * factor;
+            pOutputSamples[i*8+4] = (drflac_int32)temp2L * factor;
+            pOutputSamples[i*8+5] = (drflac_int32)temp2R * factor;
+            pOutputSamples[i*8+6] = (drflac_int32)temp3L * factor;
+            pOutputSamples[i*8+7] = (drflac_int32)temp3R * factor;
+        }
+    } else {
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = (drflac_uint32)((drflac_int32)(mid0 + side0) >> 1);
+            temp1L = (drflac_uint32)((drflac_int32)(mid1 + side1) >> 1);
+            temp2L = (drflac_uint32)((drflac_int32)(mid2 + side2) >> 1);
+            temp3L = (drflac_uint32)((drflac_int32)(mid3 + side3) >> 1);
+
+            temp0R = (drflac_uint32)((drflac_int32)(mid0 - side0) >> 1);
+            temp1R = (drflac_uint32)((drflac_int32)(mid1 - side1) >> 1);
+            temp2R = (drflac_uint32)((drflac_int32)(mid2 - side2) >> 1);
+            temp3R = (drflac_uint32)((drflac_int32)(mid3 - side3) >> 1);
+
+            pOutputSamples[i*8+0] = (drflac_int32)temp0L * factor;
+            pOutputSamples[i*8+1] = (drflac_int32)temp0R * factor;
+            pOutputSamples[i*8+2] = (drflac_int32)temp1L * factor;
+            pOutputSamples[i*8+3] = (drflac_int32)temp1R * factor;
+            pOutputSamples[i*8+4] = (drflac_int32)temp2L * factor;
+            pOutputSamples[i*8+5] = (drflac_int32)temp2R * factor;
+            pOutputSamples[i*8+6] = (drflac_int32)temp3L * factor;
+            pOutputSamples[i*8+7] = (drflac_int32)temp3R * factor;
+        }
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) * factor;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample - 8;
+    double factor;
+    __m128 factor128;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor = 1.0f / 8388608.0f;
+    factor128 = _mm_set1_ps(factor);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i tempL;
+            __m128i tempR;
+            __m128  leftf;
+            __m128  rightf;
+
+            mid    = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid    = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            tempL  = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
+            tempR  = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
+
+            leftf  = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128);
+            rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128);
+
+            _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
+            _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = ((drflac_int32)(mid + side) >> 1) * factor;
+            pOutputSamples[i*2+1] = ((drflac_int32)(mid - side) >> 1) * factor;
+        }
+    } else {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i tempL;
+            __m128i tempR;
+            __m128 leftf;
+            __m128 rightf;
+
+            mid    = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid    = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            tempL  = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
+            tempR  = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
+
+            leftf  = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128);
+            rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128);
+
+            _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
+            _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift) * factor;
+            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift) * factor;
+        }
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample - 8;
+    double factor;
+    float32x4_t factor4;
+    int32x4_t shift4;
+    int32x4_t wbps0_4;  /* Wasted Bits Per Sample */
+    int32x4_t wbps1_4;  /* Wasted Bits Per Sample */
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor  = 1.0f / 8388608.0f;
+    factor4 = vdupq_n_f32(factor);
+    wbps0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+    wbps1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            int32x4_t lefti;
+            int32x4_t righti;
+            float32x4_t leftf;
+            float32x4_t rightf;
+
+            uint32x4_t mid  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbps0_4);
+            uint32x4_t side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbps1_4);
+
+            mid    = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
+
+            lefti  = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
+            righti = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
+
+            leftf  = vmulq_f32(vcvtq_f32_s32(lefti),  factor4);
+            rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
+
+            drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = ((drflac_int32)(mid + side) >> 1) * factor;
+            pOutputSamples[i*2+1] = ((drflac_int32)(mid - side) >> 1) * factor;
+        }
+    } else {
+        shift -= 1;
+        shift4 = vdupq_n_s32(shift);
+        for (i = 0; i < frameCount4; ++i) {
+            uint32x4_t mid;
+            uint32x4_t side;
+            int32x4_t lefti;
+            int32x4_t righti;
+            float32x4_t leftf;
+            float32x4_t rightf;
+
+            mid    = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbps0_4);
+            side   = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbps1_4);
+
+            mid    = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
+
+            lefti  = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
+            righti = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
+
+            leftf  = vmulq_f32(vcvtq_f32_s32(lefti),  factor4);
+            rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
+
+            drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift) * factor;
+            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift) * factor;
+        }
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_f32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_f32__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (double)((drflac_int32)((drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample)) / 2147483648.0);
+        pOutputSamples[i*2+1] = (double)((drflac_int32)((drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample)) / 2147483648.0);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    double factor = 1 / 2147483648.0;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1;
+
+        pOutputSamples[i*8+0] = (drflac_int32)tempL0 * factor;
+        pOutputSamples[i*8+1] = (drflac_int32)tempR0 * factor;
+        pOutputSamples[i*8+2] = (drflac_int32)tempL1 * factor;
+        pOutputSamples[i*8+3] = (drflac_int32)tempR1 * factor;
+        pOutputSamples[i*8+4] = (drflac_int32)tempL2 * factor;
+        pOutputSamples[i*8+5] = (drflac_int32)tempR2 * factor;
+        pOutputSamples[i*8+6] = (drflac_int32)tempL3 * factor;
+        pOutputSamples[i*8+7] = (drflac_int32)tempR3 * factor;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+
+    double factor = 1.0f / 8388608.0f;
+    __m128 factor128 = _mm_set1_ps(factor);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i lefti;
+        __m128i righti;
+        __m128 leftf;
+        __m128 rightf;
+
+        lefti  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        righti = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+
+        leftf  = _mm_mul_ps(_mm_cvtepi32_ps(lefti),  factor128);
+        rightf = _mm_mul_ps(_mm_cvtepi32_ps(righti), factor128);
+
+        _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
+        _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+
+    double factor = 1.0f / 8388608.0f;
+    float32x4_t factor4 = vdupq_n_f32(factor);
+    int32x4_t shift0_4  = vdupq_n_s32(shift0);
+    int32x4_t shift1_4  = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        int32x4_t lefti;
+        int32x4_t righti;
+        float32x4_t leftf;
+        float32x4_t rightf;
+
+        lefti  = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4));
+        righti = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4));
+
+        leftf  = vmulq_f32(vcvtq_f32_s32(lefti),  factor4);
+        rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
+
+        drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, double* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_f32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, double* pBufferOut)
+{
+    drflac_uint64 framesRead;
+    drflac_uint32 unusedBitsPerSample;
+
+    if (pFlac == NULL || framesToRead == 0) {
+        return 0;
+    }
+
+    if (pBufferOut == NULL) {
+        return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead);
+    }
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 32);
+    unusedBitsPerSample = 32 - pFlac->bitsPerSample;
+
+    framesRead = 0;
+    while (framesToRead > 0) {
+        /* If we've run out of samples in this frame, go to the next. */
+        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                break;  /* Couldn't read the next frame, so just break from the loop and return. */
+            }
+        } else {
+            unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
+            drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining;
+            drflac_uint64 frameCountThisIteration = framesToRead;
+
+            if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) {
+                frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining;
+            }
+
+            if (channelCount == 2) {
+                const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame;
+                const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame;
+
+                switch (pFlac->currentFLACFrame.header.channelAssignment)
+                {
+                    case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE:
+                    {
+                        drflac_read_pcm_frames_f32__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
+                    {
+                        drflac_read_pcm_frames_f32__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE:
+                    {
+                        drflac_read_pcm_frames_f32__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT:
+                    default:
+                    {
+                        drflac_read_pcm_frames_f32__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+                }
+            } else {
+                /* Generic interleaving. */
+                drflac_uint64 i;
+                for (i = 0; i < frameCountThisIteration; ++i) {
+                    unsigned int j;
+                    for (j = 0; j < channelCount; ++j) {
+                        drflac_int32 sampleS32 = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample));
+                        pBufferOut[(i*channelCount)+j] = (double)(sampleS32 / 2147483648.0);
+                    }
+                }
+            }
+
+            framesRead                += frameCountThisIteration;
+            pBufferOut                += frameCountThisIteration * channelCount;
+            framesToRead              -= frameCountThisIteration;
+            pFlac->currentPCMFrame    += frameCountThisIteration;
+            pFlac->currentFLACFrame.pcmFramesRemaining -= (unsigned int)frameCountThisIteration;
+        }
+    }
+
+    return framesRead;
+}
+
+
+DRFLAC_API drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex)
+{
+    if (pFlac == NULL) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Don't do anything if we're already on the seek point. */
+    if (pFlac->currentPCMFrame == pcmFrameIndex) {
+        return DRFLAC_TRUE;
+    }
+
+    /*
+    If we don't know where the first frame begins then we can't seek. This will happen when the STREAMINFO block was not present
+    when the decoder was opened.
+    */
+    if (pFlac->firstFLACFramePosInBytes == 0) {
+        return DRFLAC_FALSE;
+    }
+
+    if (pcmFrameIndex == 0) {
+        pFlac->currentPCMFrame = 0;
+        return drflac__seek_to_first_frame(pFlac);
+    } else {
+        drflac_bool32 wasSuccessful = DRFLAC_FALSE;
+
+        /* Clamp the sample to the end. */
+        if (pcmFrameIndex > pFlac->totalPCMFrameCount) {
+            pcmFrameIndex = pFlac->totalPCMFrameCount;
+        }
+
+        /* If the target sample and the current sample are in the same frame we just move the position forward. */
+        if (pcmFrameIndex > pFlac->currentPCMFrame) {
+            /* Forward. */
+            drflac_uint32 offset = (drflac_uint32)(pcmFrameIndex - pFlac->currentPCMFrame);
+            if (pFlac->currentFLACFrame.pcmFramesRemaining >  offset) {
+                pFlac->currentFLACFrame.pcmFramesRemaining -= offset;
+                pFlac->currentPCMFrame = pcmFrameIndex;
+                return DRFLAC_TRUE;
+            }
+        } else {
+            /* Backward. */
+            drflac_uint32 offsetAbs = (drflac_uint32)(pFlac->currentPCMFrame - pcmFrameIndex);
+            drflac_uint32 currentFLACFramePCMFrameCount = pFlac->currentFLACFrame.header.blockSizeInPCMFrames;
+            drflac_uint32 currentFLACFramePCMFramesConsumed = currentFLACFramePCMFrameCount - pFlac->currentFLACFrame.pcmFramesRemaining;
+            if (currentFLACFramePCMFramesConsumed > offsetAbs) {
+                pFlac->currentFLACFrame.pcmFramesRemaining += offsetAbs;
+                pFlac->currentPCMFrame = pcmFrameIndex;
+                return DRFLAC_TRUE;
+            }
+        }
+
+        /*
+        Different techniques depending on encapsulation. Using the native FLAC seektable with Ogg encapsulation is a bit awkward so
+        we'll instead use Ogg's natural seeking facility.
+        */
+#ifndef DR_FLAC_NO_OGG
+        if (pFlac->container == drflac_container_ogg)
+        {
+            wasSuccessful = drflac_ogg__seek_to_pcm_frame(pFlac, pcmFrameIndex);
+        }
+        else
+#endif
+        {
+            /* First try seeking via the seek table. If this fails, fall back to a brute force seek which is much slower. */
+            if (/*!wasSuccessful && */!pFlac->_noSeekTableSeek) {
+                wasSuccessful = drflac__seek_to_pcm_frame__seek_table(pFlac, pcmFrameIndex);
+            }
+
+#if !defined(DR_FLAC_NO_CRC)
+            /* Fall back to binary search if seek table seeking fails. This requires the length of the stream to be known. */
+            if (!wasSuccessful && !pFlac->_noBinarySearchSeek && pFlac->totalPCMFrameCount > 0) {
+                wasSuccessful = drflac__seek_to_pcm_frame__binary_search(pFlac, pcmFrameIndex);
+            }
+#endif
+
+            /* Fall back to brute force if all else fails. */
+            if (!wasSuccessful && !pFlac->_noBruteForceSeek) {
+                wasSuccessful = drflac__seek_to_pcm_frame__brute_force(pFlac, pcmFrameIndex);
+            }
+        }
+
+        pFlac->currentPCMFrame = pcmFrameIndex;
+        return wasSuccessful;
+    }
+}
+
+
+
+/* High Level APIs */
+
+#if defined(SIZE_MAX)
+    #define DRFLAC_SIZE_MAX  SIZE_MAX
+#else
+    #if defined(DRFLAC_64BIT)
+        #define DRFLAC_SIZE_MAX  ((drflac_uint64)0xFFFFFFFFFFFFFFFF)
+    #else
+        #define DRFLAC_SIZE_MAX  0xFFFFFFFF
+    #endif
+#endif
+
+
+/* Using a macro as the definition of the drflac__full_decode_and_close_*() API family. Sue me. */
+#define DRFLAC_DEFINE_FULL_READ_AND_CLOSE(extension, type) \
+static type* drflac__full_read_and_close_ ## extension (drflac* pFlac, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut)\
+{                                                                                                                                                                   \
+    type* pSampleData = NULL;                                                                                                                                       \
+    drflac_uint64 totalPCMFrameCount;                                                                                                                               \
+                                                                                                                                                                    \
+    DRFLAC_ASSERT(pFlac != NULL);                                                                                                                                   \
+                                                                                                                                                                    \
+    totalPCMFrameCount = pFlac->totalPCMFrameCount;                                                                                                                 \
+                                                                                                                                                                    \
+    if (totalPCMFrameCount == 0) {                                                                                                                                  \
+        type buffer[4096];                                                                                                                                          \
+        drflac_uint64 pcmFramesRead;                                                                                                                                \
+        size_t sampleDataBufferSize = sizeof(buffer);                                                                                                               \
+                                                                                                                                                                    \
+        pSampleData = (type*)drflac__malloc_from_callbacks(sampleDataBufferSize, &pFlac->allocationCallbacks);                                                      \
+        if (pSampleData == NULL) {                                                                                                                                  \
+            goto on_error;                                                                                                                                          \
+        }                                                                                                                                                           \
+                                                                                                                                                                    \
+        while ((pcmFramesRead = (drflac_uint64)drflac_read_pcm_frames_##extension(pFlac, sizeof(buffer)/sizeof(buffer[0])/pFlac->channels, buffer)) > 0) {          \
+            if (((totalPCMFrameCount + pcmFramesRead) * pFlac->channels * sizeof(type)) > sampleDataBufferSize) {                                                   \
+                type* pNewSampleData;                                                                                                                               \
+                size_t newSampleDataBufferSize;                                                                                                                     \
+                                                                                                                                                                    \
+                newSampleDataBufferSize = sampleDataBufferSize * 2;                                                                                                 \
+                pNewSampleData = (type*)drflac__realloc_from_callbacks(pSampleData, newSampleDataBufferSize, sampleDataBufferSize, &pFlac->allocationCallbacks);    \
+                if (pNewSampleData == NULL) {                                                                                                                       \
+                    drflac__free_from_callbacks(pSampleData, &pFlac->allocationCallbacks);                                                                          \
+                    goto on_error;                                                                                                                                  \
+                }                                                                                                                                                   \
+                                                                                                                                                                    \
+                sampleDataBufferSize = newSampleDataBufferSize;                                                                                                     \
+                pSampleData = pNewSampleData;                                                                                                                       \
+            }                                                                                                                                                       \
+                                                                                                                                                                    \
+            DRFLAC_COPY_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), buffer, (size_t)(pcmFramesRead*pFlac->channels*sizeof(type)));                   \
+            totalPCMFrameCount += pcmFramesRead;                                                                                                                    \
+        }                                                                                                                                                           \
+                                                                                                                                                                    \
+        /* At this point everything should be decoded, but we just want to fill the unused part buffer with silence - need to                                       \
+           protect those ears from random noise! */                                                                                                                 \
+        DRFLAC_ZERO_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), (size_t)(sampleDataBufferSize - totalPCMFrameCount*pFlac->channels*sizeof(type)));   \
+    } else {                                                                                                                                                        \
+        drflac_uint64 dataSize = totalPCMFrameCount*pFlac->channels*sizeof(type);                                                                                   \
+        if (dataSize > DRFLAC_SIZE_MAX) {                                                                                                                           \
+            goto on_error;  /* The decoded data is too big. */                                                                                                      \
+        }                                                                                                                                                           \
+                                                                                                                                                                    \
+        pSampleData = (type*)drflac__malloc_from_callbacks((size_t)dataSize, &pFlac->allocationCallbacks);    /* <-- Safe cast as per the check above. */           \
+        if (pSampleData == NULL) {                                                                                                                                  \
+            goto on_error;                                                                                                                                          \
+        }                                                                                                                                                           \
+                                                                                                                                                                    \
+        totalPCMFrameCount = drflac_read_pcm_frames_##extension(pFlac, pFlac->totalPCMFrameCount, pSampleData);                                                     \
+    }                                                                                                                                                               \
+                                                                                                                                                                    \
+    if (sampleRateOut) *sampleRateOut = pFlac->sampleRate;                                                                                                          \
+    if (channelsOut) *channelsOut = pFlac->channels;                                                                                                                \
+    if (totalPCMFrameCountOut) *totalPCMFrameCountOut = totalPCMFrameCount;                                                                                         \
+                                                                                                                                                                    \
+    drflac_close(pFlac);                                                                                                                                            \
+    return pSampleData;                                                                                                                                             \
+                                                                                                                                                                    \
+on_error:                                                                                                                                                           \
+    drflac_close(pFlac);                                                                                                                                            \
+    return NULL;                                                                                                                                                    \
+}
+
+DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s32, drflac_int32)
+DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s16, drflac_int16)
+DRFLAC_DEFINE_FULL_READ_AND_CLOSE(f32, double)
+
+DRFLAC_API drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalPCMFrameCountOut) {
+        *totalPCMFrameCountOut = 0;
+    }
+
+    pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
+}
+
+DRFLAC_API drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalPCMFrameCountOut) {
+        *totalPCMFrameCountOut = 0;
+    }
+
+    pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s16(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
+}
+
+DRFLAC_API double* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalPCMFrameCountOut) {
+        *totalPCMFrameCountOut = 0;
+    }
+
+    pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_f32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
+}
+
+#ifndef DR_FLAC_NO_STDIO
+DRFLAC_API drflac_int32* drflac_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_file(filename, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+
+DRFLAC_API drflac_int16* drflac_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_file(filename, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+
+DRFLAC_API double* drflac_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_file(filename, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+#endif
+
+DRFLAC_API drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+
+DRFLAC_API drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+
+DRFLAC_API double* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+
+
+DRFLAC_API void drflac_free(void* p, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks != NULL) {
+        drflac__free_from_callbacks(p, pAllocationCallbacks);
+    } else {
+        drflac__free_default(p, NULL);
+    }
+}
+
+
+
+
+DRFLAC_API void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const void* pComments)
+{
+    if (pIter == NULL) {
+        return;
+    }
+
+    pIter->countRemaining = commentCount;
+    pIter->pRunningData   = (const char*)pComments;
+}
+
+DRFLAC_API const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut)
+{
+    drflac_int32 length;
+    const char* pComment;
+
+    /* Safety. */
+    if (pCommentLengthOut) {
+        *pCommentLengthOut = 0;
+    }
+
+    if (pIter == NULL || pIter->countRemaining == 0 || pIter->pRunningData == NULL) {
+        return NULL;
+    }
+
+    length = drflac__le2host_32(*(const drflac_uint32*)pIter->pRunningData);
+    pIter->pRunningData += 4;
+
+    pComment = pIter->pRunningData;
+    pIter->pRunningData += length;
+    pIter->countRemaining -= 1;
+
+    if (pCommentLengthOut) {
+        *pCommentLengthOut = length;
+    }
+
+    return pComment;
+}
+
+
+
+
+DRFLAC_API void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_uint32 trackCount, const void* pTrackData)
+{
+    if (pIter == NULL) {
+        return;
+    }
+
+    pIter->countRemaining = trackCount;
+    pIter->pRunningData   = (const char*)pTrackData;
+}
+
+DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack)
+{
+    drflac_cuesheet_track cuesheetTrack;
+    const char* pRunningData;
+    drflac_uint64 offsetHi;
+    drflac_uint64 offsetLo;
+
+    if (pIter == NULL || pIter->countRemaining == 0 || pIter->pRunningData == NULL) {
+        return DRFLAC_FALSE;
+    }
+
+    pRunningData = pIter->pRunningData;
+
+    offsetHi                   = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+    offsetLo                   = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+    cuesheetTrack.offset       = offsetLo | (offsetHi << 32);
+    cuesheetTrack.trackNumber  = pRunningData[0];                                         pRunningData += 1;
+    DRFLAC_COPY_MEMORY(cuesheetTrack.ISRC, pRunningData, sizeof(cuesheetTrack.ISRC));     pRunningData += 12;
+    cuesheetTrack.isAudio      = (pRunningData[0] & 0x80) != 0;
+    cuesheetTrack.preEmphasis  = (pRunningData[0] & 0x40) != 0;                           pRunningData += 14;
+    cuesheetTrack.indexCount   = pRunningData[0];                                         pRunningData += 1;
+    cuesheetTrack.pIndexPoints = (const drflac_cuesheet_track_index*)pRunningData;        pRunningData += cuesheetTrack.indexCount * sizeof(drflac_cuesheet_track_index);
+
+    pIter->pRunningData = pRunningData;
+    pIter->countRemaining -= 1;
+
+    if (pCuesheetTrack) {
+        *pCuesheetTrack = cuesheetTrack;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+#if defined(__GNUC__)
+    #pragma GCC diagnostic pop
+#endif
+#endif  /* DR_FLAC_IMPLEMENTATION */
+
+
+/*
+REVISION HISTORY
+================
+v0.12.13 - 2020-05-16
+  - Add compile-time and run-time version querying.
+    - DRFLAC_VERSION_MINOR
+    - DRFLAC_VERSION_MAJOR
+    - DRFLAC_VERSION_REVISION
+    - DRFLAC_VERSION_STRING
+    - drflac_version()
+    - drflac_version_string()
+
+v0.12.12 - 2020-04-30
+  - Fix compilation errors with VC6.
+
+v0.12.11 - 2020-04-19
+  - Fix some pedantic warnings.
+  - Fix some undefined behaviour warnings.
+
+v0.12.10 - 2020-04-10
+  - Fix some bugs when trying to seek with an invalid seek table.
+
+v0.12.9 - 2020-04-05
+  - Fix warnings.
+
+v0.12.8 - 2020-04-04
+  - Add drflac_open_file_w() and drflac_open_file_with_metadata_w().
+  - Fix some static analysis warnings.
+  - Minor documentation updates.
+
+v0.12.7 - 2020-03-14
+  - Fix compilation errors with VC6.
+
+v0.12.6 - 2020-03-07
+  - Fix compilation error with Visual Studio .NET 2003.
+
+v0.12.5 - 2020-01-30
+  - Silence some static analysis warnings.
+
+v0.12.4 - 2020-01-29
+  - Silence some static analysis warnings.
+
+v0.12.3 - 2019-12-02
+  - Fix some warnings when compiling with GCC and the -Og flag.
+  - Fix a crash in out-of-memory situations.
+  - Fix potential integer overflow bug.
+  - Fix some static analysis warnings.
+  - Fix a possible crash when using custom memory allocators without a custom realloc() implementation.
+  - Fix a bug with binary search seeking where the bits per sample is not a multiple of 8.
+
+v0.12.2 - 2019-10-07
+  - Internal code clean up.
+
+v0.12.1 - 2019-09-29
+  - Fix some Clang Static Analyzer warnings.
+  - Fix an unused variable warning.
+
+v0.12.0 - 2019-09-23
+  - API CHANGE: Add support for user defined memory allocation routines. This system allows the program to specify their own memory allocation
+    routines with a user data pointer for client-specific contextual data. This adds an extra parameter to the end of the following APIs:
+    - drflac_open()
+    - drflac_open_relaxed()
+    - drflac_open_with_metadata()
+    - drflac_open_with_metadata_relaxed()
+    - drflac_open_file()
+    - drflac_open_file_with_metadata()
+    - drflac_open_memory()
+    - drflac_open_memory_with_metadata()
+    - drflac_open_and_read_pcm_frames_s32()
+    - drflac_open_and_read_pcm_frames_s16()
+    - drflac_open_and_read_pcm_frames_f32()
+    - drflac_open_file_and_read_pcm_frames_s32()
+    - drflac_open_file_and_read_pcm_frames_s16()
+    - drflac_open_file_and_read_pcm_frames_f32()
+    - drflac_open_memory_and_read_pcm_frames_s32()
+    - drflac_open_memory_and_read_pcm_frames_s16()
+    - drflac_open_memory_and_read_pcm_frames_f32()
+    Set this extra parameter to NULL to use defaults which is the same as the previous behaviour. Setting this NULL will use
+    DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE.
+  - Remove deprecated APIs:
+    - drflac_read_s32()
+    - drflac_read_s16()
+    - drflac_read_f32()
+    - drflac_seek_to_sample()
+    - drflac_open_and_decode_s32()
+    - drflac_open_and_decode_s16()
+    - drflac_open_and_decode_f32()
+    - drflac_open_and_decode_file_s32()
+    - drflac_open_and_decode_file_s16()
+    - drflac_open_and_decode_file_f32()
+    - drflac_open_and_decode_memory_s32()
+    - drflac_open_and_decode_memory_s16()
+    - drflac_open_and_decode_memory_f32()
+  - Remove drflac.totalSampleCount which is now replaced with drflac.totalPCMFrameCount. You can emulate drflac.totalSampleCount
+    by doing pFlac->totalPCMFrameCount*pFlac->channels.
+  - Rename drflac.currentFrame to drflac.currentFLACFrame to remove ambiguity with PCM frames.
+  - Fix errors when seeking to the end of a stream.
+  - Optimizations to seeking.
+  - SSE improvements and optimizations.
+  - ARM NEON optimizations.
+  - Optimizations to drflac_read_pcm_frames_s16().
+  - Optimizations to drflac_read_pcm_frames_s32().
+
+v0.11.10 - 2019-06-26
+  - Fix a compiler error.
+
+v0.11.9 - 2019-06-16
+  - Silence some ThreadSanitizer warnings.
+
+v0.11.8 - 2019-05-21
+  - Fix warnings.
+
+v0.11.7 - 2019-05-06
+  - C89 fixes.
+
+v0.11.6 - 2019-05-05
+  - Add support for C89.
+  - Fix a compiler warning when CRC is disabled.
+  - Change license to choice of public domain or MIT-0.
+
+v0.11.5 - 2019-04-19
+  - Fix a compiler error with GCC.
+
+v0.11.4 - 2019-04-17
+  - Fix some warnings with GCC when compiling with -std=c99.
+
+v0.11.3 - 2019-04-07
+  - Silence warnings with GCC.
+
+v0.11.2 - 2019-03-10
+  - Fix a warning.
+
+v0.11.1 - 2019-02-17
+  - Fix a potential bug with seeking.
+
+v0.11.0 - 2018-12-16
+  - API CHANGE: Deprecated drflac_read_s32(), drflac_read_s16() and drflac_read_f32() and replaced them with
+    drflac_read_pcm_frames_s32(), drflac_read_pcm_frames_s16() and drflac_read_pcm_frames_f32(). The new APIs take
+    and return PCM frame counts instead of sample counts. To upgrade you will need to change the input count by
+    dividing it by the channel count, and then do the same with the return value.
+  - API_CHANGE: Deprecated drflac_seek_to_sample() and replaced with drflac_seek_to_pcm_frame(). Same rules as
+    the changes to drflac_read_*() apply.
+  - API CHANGE: Deprecated drflac_open_and_decode_*() and replaced with drflac_open_*_and_read_*(). Same rules as
+    the changes to drflac_read_*() apply.
+  - Optimizations.
+
+v0.10.0 - 2018-09-11
+  - Remove the DR_FLAC_NO_WIN32_IO option and the Win32 file IO functionality. If you need to use Win32 file IO you
+    need to do it yourself via the callback API.
+  - Fix the clang build.
+  - Fix undefined behavior.
+  - Fix errors with CUESHEET metdata blocks.
+  - Add an API for iterating over each cuesheet track in the CUESHEET metadata block. This works the same way as the
+    Vorbis comment API.
+  - Other miscellaneous bug fixes, mostly relating to invalid FLAC streams.
+  - Minor optimizations.
+
+v0.9.11 - 2018-08-29
+  - Fix a bug with sample reconstruction.
+
+v0.9.10 - 2018-08-07
+  - Improve 64-bit detection.
+
+v0.9.9 - 2018-08-05
+  - Fix C++ build on older versions of GCC.
+
+v0.9.8 - 2018-07-24
+  - Fix compilation errors.
+
+v0.9.7 - 2018-07-05
+  - Fix a warning.
+
+v0.9.6 - 2018-06-29
+  - Fix some typos.
+
+v0.9.5 - 2018-06-23
+  - Fix some warnings.
+
+v0.9.4 - 2018-06-14
+  - Optimizations to seeking.
+  - Clean up.
+
+v0.9.3 - 2018-05-22
+  - Bug fix.
+
+v0.9.2 - 2018-05-12
+  - Fix a compilation error due to a missing break statement.
+
+v0.9.1 - 2018-04-29
+  - Fix compilation error with Clang.
+
+v0.9 - 2018-04-24
+  - Fix Clang build.
+  - Start using major.minor.revision versioning.
+
+v0.8g - 2018-04-19
+  - Fix build on non-x86/x64 architectures.
+
+v0.8f - 2018-02-02
+  - Stop pretending to support changing rate/channels mid stream.
+
+v0.8e - 2018-02-01
+  - Fix a crash when the block size of a frame is larger than the maximum block size defined by the FLAC stream.
+  - Fix a crash the the Rice partition order is invalid.
+
+v0.8d - 2017-09-22
+  - Add support for decoding streams with ID3 tags. ID3 tags are just skipped.
+
+v0.8c - 2017-09-07
+  - Fix warning on non-x86/x64 architectures.
+
+v0.8b - 2017-08-19
+  - Fix build on non-x86/x64 architectures.
+
+v0.8a - 2017-08-13
+  - A small optimization for the Clang build.
+
+v0.8 - 2017-08-12
+  - API CHANGE: Rename dr_* types to drflac_*.
+  - Optimizations. This brings dr_flac back to about the same class of efficiency as the reference implementation.
+  - Add support for custom implementations of malloc(), realloc(), etc.
+  - Add CRC checking to Ogg encapsulated streams.
+  - Fix VC++ 6 build. This is only for the C++ compiler. The C compiler is not currently supported.
+  - Bug fixes.
+
+v0.7 - 2017-07-23
+  - Add support for opening a stream without a header block. To do this, use drflac_open_relaxed() / drflac_open_with_metadata_relaxed().
+
+v0.6 - 2017-07-22
+  - Add support for recovering from invalid frames. With this change, dr_flac will simply skip over invalid frames as if they
+    never existed. Frames are checked against their sync code, the CRC-8 of the frame header and the CRC-16 of the whole frame.
+
+v0.5 - 2017-07-16
+  - Fix typos.
+  - Change drflac_bool* types to unsigned.
+  - Add CRC checking. This makes dr_flac slower, but can be disabled with #define DR_FLAC_NO_CRC.
+
+v0.4f - 2017-03-10
+  - Fix a couple of bugs with the bitstreaming code.
+
+v0.4e - 2017-02-17
+  - Fix some warnings.
+
+v0.4d - 2016-12-26
+  - Add support for 32-bit floating-point PCM decoding.
+  - Use drflac_int* and drflac_uint* sized types to improve compiler support.
+  - Minor improvements to documentation.
+
+v0.4c - 2016-12-26
+  - Add support for signed 16-bit integer PCM decoding.
+
+v0.4b - 2016-10-23
+  - A minor change to drflac_bool8 and drflac_bool32 types.
+
+v0.4a - 2016-10-11
+  - Rename drBool32 to drflac_bool32 for styling consistency.
+
+v0.4 - 2016-09-29
+  - API/ABI CHANGE: Use fixed size 32-bit booleans instead of the built-in bool type.
+  - API CHANGE: Rename drflac_open_and_decode*() to drflac_open_and_decode*_s32().
+  - API CHANGE: Swap the order of "channels" and "sampleRate" parameters in drflac_open_and_decode*(). Rationale for this is to
+    keep it consistent with drflac_audio.
+
+v0.3f - 2016-09-21
+  - Fix a warning with GCC.
+
+v0.3e - 2016-09-18
+  - Fixed a bug where GCC 4.3+ was not getting properly identified.
+  - Fixed a few typos.
+  - Changed date formats to ISO 8601 (YYYY-MM-DD).
+
+v0.3d - 2016-06-11
+  - Minor clean up.
+
+v0.3c - 2016-05-28
+  - Fixed compilation error.
+
+v0.3b - 2016-05-16
+  - Fixed Linux/GCC build.
+  - Updated documentation.
+
+v0.3a - 2016-05-15
+  - Minor fixes to documentation.
+
+v0.3 - 2016-05-11
+  - Optimizations. Now at about parity with the reference implementation on 32-bit builds.
+  - Lots of clean up.
+
+v0.2b - 2016-05-10
+  - Bug fixes.
+
+v0.2a - 2016-05-10
+  - Made drflac_open_and_decode() more robust.
+  - Removed an unused debugging variable
+
+v0.2 - 2016-05-09
+  - Added support for Ogg encapsulation.
+  - API CHANGE. Have the onSeek callback take a third argument which specifies whether or not the seek
+    should be relative to the start or the current position. Also changes the seeking rules such that
+    seeking offsets will never be negative.
+  - Have drflac_open_and_decode() fail gracefully if the stream has an unknown total sample count.
+
+v0.1b - 2016-05-07
+  - Properly close the file handle in drflac_open_file() and family when the decoder fails to initialize.
+  - Removed a stale comment.
+
+v0.1a - 2016-05-05
+  - Minor formatting changes.
+  - Fixed a warning on the GCC build.
+
+v0.1 - 2016-05-03
+  - Initial versioned release.
+*/
+
+/*
+This software is available as a choice of the following licenses. Choose
+whichever you prefer.
+
+===============================================================================
+ALTERNATIVE 1 - Public Domain (www.unlicense.org)
+===============================================================================
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
+
+===============================================================================
+ALTERNATIVE 2 - MIT No Attribution
+===============================================================================
+Copyright 2020 David Reid
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
diff --git a/sys-tune/source/impl/dr_flac.h.bak b/sys-tune/source/impl/dr_flac.h.bak
index 53a48c9..5dcb220 100644
--- a/sys-tune/source/impl/dr_flac.h.bak
+++ b/sys-tune/source/impl/dr_flac.h.bak
@@ -1,12099 +1,12099 @@
-/*
-FLAC audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
-dr_flac - v0.12.13 - 2020-05-16
-
-David Reid - mackron@gmail.com
-
-GitHub: https://github.com/mackron/dr_libs
-*/
-
-/*
-RELEASE NOTES - v0.12.0
-=======================
-Version 0.12.0 has breaking API changes including changes to the existing API and the removal of deprecated APIs.
-
-
-Improved Client-Defined Memory Allocation
------------------------------------------
-The main change with this release is the addition of a more flexible way of implementing custom memory allocation routines. The
-existing system of DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE are still in place and will be used by default when no custom
-allocation callbacks are specified.
-
-To use the new system, you pass in a pointer to a drflac_allocation_callbacks object to drflac_open() and family, like this:
-
-    void* my_malloc(size_t sz, void* pUserData)
-    {
-        return malloc(sz);
-    }
-    void* my_realloc(void* p, size_t sz, void* pUserData)
-    {
-        return realloc(p, sz);
-    }
-    void my_free(void* p, void* pUserData)
-    {
-        free(p);
-    }
-
-    ...
-
-    drflac_allocation_callbacks allocationCallbacks;
-    allocationCallbacks.pUserData = &myData;
-    allocationCallbacks.onMalloc  = my_malloc;
-    allocationCallbacks.onRealloc = my_realloc;
-    allocationCallbacks.onFree    = my_free;
-    drflac* pFlac = drflac_open_file("my_file.flac", &allocationCallbacks);
-
-The advantage of this new system is that it allows you to specify user data which will be passed in to the allocation routines.
-
-Passing in null for the allocation callbacks object will cause dr_flac to use defaults which is the same as DRFLAC_MALLOC,
-DRFLAC_REALLOC and DRFLAC_FREE and the equivalent of how it worked in previous versions.
-
-Every API that opens a drflac object now takes this extra parameter. These include the following:
-
-    drflac_open()
-    drflac_open_relaxed()
-    drflac_open_with_metadata()
-    drflac_open_with_metadata_relaxed()
-    drflac_open_file()
-    drflac_open_file_with_metadata()
-    drflac_open_memory()
-    drflac_open_memory_with_metadata()
-    drflac_open_and_read_pcm_frames_s32()
-    drflac_open_and_read_pcm_frames_s16()
-    drflac_open_and_read_pcm_frames_f32()
-    drflac_open_file_and_read_pcm_frames_s32()
-    drflac_open_file_and_read_pcm_frames_s16()
-    drflac_open_file_and_read_pcm_frames_f32()
-    drflac_open_memory_and_read_pcm_frames_s32()
-    drflac_open_memory_and_read_pcm_frames_s16()
-    drflac_open_memory_and_read_pcm_frames_f32()
-
-
-
-Optimizations
--------------
-Seeking performance has been greatly improved. A new binary search based seeking algorithm has been introduced which significantly
-improves performance over the brute force method which was used when no seek table was present. Seek table based seeking also takes
-advantage of the new binary search seeking system to further improve performance there as well. Note that this depends on CRC which
-means it will be disabled when DR_FLAC_NO_CRC is used.
-
-The SSE4.1 pipeline has been cleaned up and optimized. You should see some improvements with decoding speed of 24-bit files in
-particular. 16-bit streams should also see some improvement.
-
-drflac_read_pcm_frames_s16() has been optimized. Previously this sat on top of drflac_read_pcm_frames_s32() and performed it's s32
-to s16 conversion in a second pass. This is now all done in a single pass. This includes SSE2 and ARM NEON optimized paths.
-
-A minor optimization has been implemented for drflac_read_pcm_frames_s32(). This will now use an SSE2 optimized pipeline for stereo
-channel reconstruction which is the last part of the decoding process.
-
-The ARM build has seen a few improvements. The CLZ (count leading zeroes) and REV (byte swap) instructions are now used when
-compiling with GCC and Clang which is achieved using inline assembly. The CLZ instruction requires ARM architecture version 5 at
-compile time and the REV instruction requires ARM architecture version 6.
-
-An ARM NEON optimized pipeline has been implemented. To enable this you'll need to add -mfpu=neon to the command line when compiling.
-
-
-Removed APIs
-------------
-The following APIs were deprecated in version 0.11.0 and have been completely removed in version 0.12.0:
-
-    drflac_read_s32()                   -> drflac_read_pcm_frames_s32()
-    drflac_read_s16()                   -> drflac_read_pcm_frames_s16()
-    drflac_read_f32()                   -> drflac_read_pcm_frames_f32()
-    drflac_seek_to_sample()             -> drflac_seek_to_pcm_frame()
-    drflac_open_and_decode_s32()        -> drflac_open_and_read_pcm_frames_s32()
-    drflac_open_and_decode_s16()        -> drflac_open_and_read_pcm_frames_s16()
-    drflac_open_and_decode_f32()        -> drflac_open_and_read_pcm_frames_f32()
-    drflac_open_and_decode_file_s32()   -> drflac_open_file_and_read_pcm_frames_s32()
-    drflac_open_and_decode_file_s16()   -> drflac_open_file_and_read_pcm_frames_s16()
-    drflac_open_and_decode_file_f32()   -> drflac_open_file_and_read_pcm_frames_f32()
-    drflac_open_and_decode_memory_s32() -> drflac_open_memory_and_read_pcm_frames_s32()
-    drflac_open_and_decode_memory_s16() -> drflac_open_memory_and_read_pcm_frames_s16()
-    drflac_open_and_decode_memory_f32() -> drflac_open_memroy_and_read_pcm_frames_f32()
-
-Prior versions of dr_flac operated on a per-sample basis whereas now it operates on PCM frames. The removed APIs all relate
-to the old per-sample APIs. You now need to use the "pcm_frame" versions.
-*/
-
-
-/*
-Introduction
-============
-dr_flac is a single file library. To use it, do something like the following in one .c file.
-
-    ```c
-    #define DR_FLAC_IMPLEMENTATION
-    #include "dr_flac.h"
-    ```
-
-You can then #include this file in other parts of the program as you would with any other header file. To decode audio data, do something like the following:
-
-    ```c
-    drflac* pFlac = drflac_open_file("MySong.flac", NULL);
-    if (pFlac == NULL) {
-        // Failed to open FLAC file
-    }
-
-    drflac_int32* pSamples = malloc(pFlac->totalPCMFrameCount * pFlac->channels * sizeof(drflac_int32));
-    drflac_uint64 numberOfInterleavedSamplesActuallyRead = drflac_read_pcm_frames_s32(pFlac, pFlac->totalPCMFrameCount, pSamples);
-    ```
-
-The drflac object represents the decoder. It is a transparent type so all the information you need, such as the number of channels and the bits per sample,
-should be directly accessible - just make sure you don't change their values. Samples are always output as interleaved signed 32-bit PCM. In the example above
-a native FLAC stream was opened, however dr_flac has seamless support for Ogg encapsulated FLAC streams as well.
-
-You do not need to decode the entire stream in one go - you just specify how many samples you'd like at any given time and the decoder will give you as many
-samples as it can, up to the amount requested. Later on when you need the next batch of samples, just call it again. Example:
-
-    ```c
-    while (drflac_read_pcm_frames_s32(pFlac, chunkSizeInPCMFrames, pChunkSamples) > 0) {
-        do_something();
-    }
-    ```
-
-You can seek to a specific PCM frame with `drflac_seek_to_pcm_frame()`.
-
-If you just want to quickly decode an entire FLAC file in one go you can do something like this:
-
-    ```c
-    unsigned int channels;
-    unsigned int sampleRate;
-    drflac_uint64 totalPCMFrameCount;
-    drflac_int32* pSampleData = drflac_open_file_and_read_pcm_frames_s32("MySong.flac", &channels, &sampleRate, &totalPCMFrameCount, NULL);
-    if (pSampleData == NULL) {
-        // Failed to open and decode FLAC file.
-    }
-
-    ...
-
-    drflac_free(pSampleData);
-    ```
-
-You can read samples as signed 16-bit integer and 32-bit floating-point PCM with the *_s16() and *_f32() family of APIs respectively, but note that these
-should be considered lossy.
-
-
-If you need access to metadata (album art, etc.), use `drflac_open_with_metadata()`, `drflac_open_file_with_metdata()` or `drflac_open_memory_with_metadata()`.
-The rationale for keeping these APIs separate is that they're slightly slower than the normal versions and also just a little bit harder to use. dr_flac
-reports metadata to the application through the use of a callback, and every metadata block is reported before `drflac_open_with_metdata()` returns.
-
-The main opening APIs (`drflac_open()`, etc.) will fail if the header is not present. The presents a problem in certain scenarios such as broadcast style
-streams or internet radio where the header may not be present because the user has started playback mid-stream. To handle this, use the relaxed APIs:
-    
-    `drflac_open_relaxed()`
-    `drflac_open_with_metadata_relaxed()`
-
-It is not recommended to use these APIs for file based streams because a missing header would usually indicate a corrupt or perverse file. In addition, these
-APIs can take a long time to initialize because they may need to spend a lot of time finding the first frame.
-
-
-
-Build Options
-=============
-#define these options before including this file.
-
-#define DR_FLAC_NO_STDIO
-  Disable `drflac_open_file()` and family.
-
-#define DR_FLAC_NO_OGG
-  Disables support for Ogg/FLAC streams.
-
-#define DR_FLAC_BUFFER_SIZE <number>
-  Defines the size of the internal buffer to store data from onRead(). This buffer is used to reduce the number of calls back to the client for more data.
-  Larger values means more memory, but better performance. My tests show diminishing returns after about 4KB (which is the default). Consider reducing this if
-  you have a very efficient implementation of onRead(), or increase it if it's very inefficient. Must be a multiple of 8.
-
-#define DR_FLAC_NO_CRC
-  Disables CRC checks. This will offer a performance boost when CRC is unnecessary. This will disable binary search seeking. When seeking, the seek table will
-  be used if available. Otherwise the seek will be performed using brute force.
-
-#define DR_FLAC_NO_SIMD
-  Disables SIMD optimizations (SSE on x86/x64 architectures, NEON on ARM architectures). Use this if you are having compatibility issues with your compiler.
-
-
-
-Notes
-=====
-- dr_flac does not support changing the sample rate nor channel count mid stream.
-- dr_flac is not thread-safe, but its APIs can be called from any thread so long as you do your own synchronization.
-- When using Ogg encapsulation, a corrupted metadata block will result in `drflac_open_with_metadata()` and `drflac_open()` returning inconsistent samples due
-  to differences in corrupted stream recorvery logic between the two APIs.
-*/
-
-#ifndef dr_flac_h
-#define dr_flac_h
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define DRFLAC_STRINGIFY(x)      #x
-#define DRFLAC_XSTRINGIFY(x)     DRFLAC_STRINGIFY(x)
-
-#define DRFLAC_VERSION_MAJOR     0
-#define DRFLAC_VERSION_MINOR     12
-#define DRFLAC_VERSION_REVISION  13
-#define DRFLAC_VERSION_STRING    DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MAJOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MINOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_REVISION)
-
-#include <stddef.h> /* For size_t. */
-
-/* Sized types. Prefer built-in types. Fall back to stdint. */
-#ifdef _MSC_VER
-    #if defined(__clang__)
-        #pragma GCC diagnostic push
-        #pragma GCC diagnostic ignored "-Wlanguage-extension-token"
-        #pragma GCC diagnostic ignored "-Wlong-long"        
-        #pragma GCC diagnostic ignored "-Wc++11-long-long"
-    #endif
-    typedef   signed __int8  drflac_int8;
-    typedef unsigned __int8  drflac_uint8;
-    typedef   signed __int16 drflac_int16;
-    typedef unsigned __int16 drflac_uint16;
-    typedef   signed __int32 drflac_int32;
-    typedef unsigned __int32 drflac_uint32;
-    typedef   signed __int64 drflac_int64;
-    typedef unsigned __int64 drflac_uint64;
-    #if defined(__clang__)
-        #pragma GCC diagnostic pop
-    #endif
-#else
-    #include <stdint.h>
-    typedef int8_t           drflac_int8;
-    typedef uint8_t          drflac_uint8;
-    typedef int16_t          drflac_int16;
-    typedef uint16_t         drflac_uint16;
-    typedef int32_t          drflac_int32;
-    typedef uint32_t         drflac_uint32;
-    typedef int64_t          drflac_int64;
-    typedef uint64_t         drflac_uint64;
-#endif
-typedef drflac_uint8         drflac_bool8;
-typedef drflac_uint32        drflac_bool32;
-#define DRFLAC_TRUE          1
-#define DRFLAC_FALSE         0
-
-#if !defined(DRFLAC_API)
-    #if defined(DRFLAC_DLL)
-        #if defined(_WIN32)
-            #define DRFLAC_DLL_IMPORT  __declspec(dllimport)
-            #define DRFLAC_DLL_EXPORT  __declspec(dllexport)
-            #define DRFLAC_DLL_PRIVATE static
-        #else
-            #if defined(__GNUC__) && __GNUC__ >= 4
-                #define DRFLAC_DLL_IMPORT  __attribute__((visibility("default")))
-                #define DRFLAC_DLL_EXPORT  __attribute__((visibility("default")))
-                #define DRFLAC_DLL_PRIVATE __attribute__((visibility("hidden")))
-            #else
-                #define DRFLAC_DLL_IMPORT
-                #define DRFLAC_DLL_EXPORT
-                #define DRFLAC_DLL_PRIVATE static
-            #endif
-        #endif
-
-        #if defined(DR_FLAC_IMPLEMENTATION) || defined(DRFLAC_IMPLEMENTATION)
-            #define DRFLAC_API  DRFLAC_DLL_EXPORT
-        #else
-            #define DRFLAC_API  DRFLAC_DLL_IMPORT
-        #endif
-        #define DRFLAC_PRIVATE DRFLAC_DLL_PRIVATE
-    #else
-        #define DRFLAC_API extern
-        #define DRFLAC_PRIVATE static
-    #endif
-#endif
-
-#if defined(_MSC_VER) && _MSC_VER >= 1700   /* Visual Studio 2012 */
-    #define DRFLAC_DEPRECATED       __declspec(deprecated)
-#elif (defined(__GNUC__) && __GNUC__ >= 4)  /* GCC 4 */
-    #define DRFLAC_DEPRECATED       __attribute__((deprecated))
-#elif defined(__has_feature)                /* Clang */
-    #if __has_feature(attribute_deprecated)
-        #define DRFLAC_DEPRECATED   __attribute__((deprecated))
-    #else
-        #define DRFLAC_DEPRECATED
-    #endif
-#else
-    #define DRFLAC_DEPRECATED
-#endif
-
-DRFLAC_API void drflac_version(drflac_uint32* pMajor, drflac_uint32* pMinor, drflac_uint32* pRevision);
-DRFLAC_API const char* drflac_version_string();
-
-/*
-As data is read from the client it is placed into an internal buffer for fast access. This controls the size of that buffer. Larger values means more speed,
-but also more memory. In my testing there is diminishing returns after about 4KB, but you can fiddle with this to suit your own needs. Must be a multiple of 8.
-*/
-#ifndef DR_FLAC_BUFFER_SIZE
-#define DR_FLAC_BUFFER_SIZE   4096
-#endif
-
-/* Check if we can enable 64-bit optimizations. */
-#if defined(_WIN64) || defined(_LP64) || defined(__LP64__)
-#define DRFLAC_64BIT
-#endif
-
-#ifdef DRFLAC_64BIT
-typedef drflac_uint64 drflac_cache_t;
-#else
-typedef drflac_uint32 drflac_cache_t;
-#endif
-
-/* The various metadata block types. */
-#define DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO       0
-#define DRFLAC_METADATA_BLOCK_TYPE_PADDING          1
-#define DRFLAC_METADATA_BLOCK_TYPE_APPLICATION      2
-#define DRFLAC_METADATA_BLOCK_TYPE_SEEKTABLE        3
-#define DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT   4
-#define DRFLAC_METADATA_BLOCK_TYPE_CUESHEET         5
-#define DRFLAC_METADATA_BLOCK_TYPE_PICTURE          6
-#define DRFLAC_METADATA_BLOCK_TYPE_INVALID          127
-
-/* The various picture types specified in the PICTURE block. */
-#define DRFLAC_PICTURE_TYPE_OTHER                   0
-#define DRFLAC_PICTURE_TYPE_FILE_ICON               1
-#define DRFLAC_PICTURE_TYPE_OTHER_FILE_ICON         2
-#define DRFLAC_PICTURE_TYPE_COVER_FRONT             3
-#define DRFLAC_PICTURE_TYPE_COVER_BACK              4
-#define DRFLAC_PICTURE_TYPE_LEAFLET_PAGE            5
-#define DRFLAC_PICTURE_TYPE_MEDIA                   6
-#define DRFLAC_PICTURE_TYPE_LEAD_ARTIST             7
-#define DRFLAC_PICTURE_TYPE_ARTIST                  8
-#define DRFLAC_PICTURE_TYPE_CONDUCTOR               9
-#define DRFLAC_PICTURE_TYPE_BAND                    10
-#define DRFLAC_PICTURE_TYPE_COMPOSER                11
-#define DRFLAC_PICTURE_TYPE_LYRICIST                12
-#define DRFLAC_PICTURE_TYPE_RECORDING_LOCATION      13
-#define DRFLAC_PICTURE_TYPE_DURING_RECORDING        14
-#define DRFLAC_PICTURE_TYPE_DURING_PERFORMANCE      15
-#define DRFLAC_PICTURE_TYPE_SCREEN_CAPTURE          16
-#define DRFLAC_PICTURE_TYPE_BRIGHT_COLORED_FISH     17
-#define DRFLAC_PICTURE_TYPE_ILLUSTRATION            18
-#define DRFLAC_PICTURE_TYPE_BAND_LOGOTYPE           19
-#define DRFLAC_PICTURE_TYPE_PUBLISHER_LOGOTYPE      20
-
-typedef enum
-{
-    drflac_container_native,
-    drflac_container_ogg,
-    drflac_container_unknown
-} drflac_container;
-
-typedef enum
-{
-    drflac_seek_origin_start,
-    drflac_seek_origin_current
-} drflac_seek_origin;
-
-/* Packing is important on this structure because we map this directly to the raw data within the SEEKTABLE metadata block. */
-#pragma pack(2)
-typedef struct
-{
-    drflac_uint64 firstPCMFrame;
-    drflac_uint64 flacFrameOffset;   /* The offset from the first byte of the header of the first frame. */
-    drflac_uint16 pcmFrameCount;
-} drflac_seekpoint;
-#pragma pack()
-
-typedef struct
-{
-    drflac_uint16 minBlockSizeInPCMFrames;
-    drflac_uint16 maxBlockSizeInPCMFrames;
-    drflac_uint32 minFrameSizeInPCMFrames;
-    drflac_uint32 maxFrameSizeInPCMFrames;
-    drflac_uint32 sampleRate;
-    drflac_uint8  channels;
-    drflac_uint8  bitsPerSample;
-    drflac_uint64 totalPCMFrameCount;
-    drflac_uint8  md5[16];
-} drflac_streaminfo;
-
-typedef struct
-{
-    /* The metadata type. Use this to know how to interpret the data below. */
-    drflac_uint32 type;
-
-    /*
-    A pointer to the raw data. This points to a temporary buffer so don't hold on to it. It's best to
-    not modify the contents of this buffer. Use the structures below for more meaningful and structured
-    information about the metadata. It's possible for this to be null.
-    */
-    const void* pRawData;
-
-    /* The size in bytes of the block and the buffer pointed to by pRawData if it's non-NULL. */
-    drflac_uint32 rawDataSize;
-
-    union
-    {
-        drflac_streaminfo streaminfo;
-
-        struct
-        {
-            int unused;
-        } padding;
-
-        struct
-        {
-            drflac_uint32 id;
-            const void* pData;
-            drflac_uint32 dataSize;
-        } application;
-
-        struct
-        {
-            drflac_uint32 seekpointCount;
-            const drflac_seekpoint* pSeekpoints;
-        } seektable;
-
-        struct
-        {
-            drflac_uint32 vendorLength;
-            const char* vendor;
-            drflac_uint32 commentCount;
-            const void* pComments;
-        } vorbis_comment;
-
-        struct
-        {
-            char catalog[128];
-            drflac_uint64 leadInSampleCount;
-            drflac_bool32 isCD;
-            drflac_uint8 trackCount;
-            const void* pTrackData;
-        } cuesheet;
-
-        struct
-        {
-            drflac_uint32 type;
-            drflac_uint32 mimeLength;
-            const char* mime;
-            drflac_uint32 descriptionLength;
-            const char* description;
-            drflac_uint32 width;
-            drflac_uint32 height;
-            drflac_uint32 colorDepth;
-            drflac_uint32 indexColorCount;
-            drflac_uint32 pictureDataSize;
-            const drflac_uint8* pPictureData;
-        } picture;
-    } data;
-} drflac_metadata;
-
-
-/*
-Callback for when data needs to be read from the client.
-
-
-Parameters
-----------
-pUserData (in)
-    The user data that was passed to drflac_open() and family.
-
-pBufferOut (out)
-    The output buffer.
-
-bytesToRead (in)
-    The number of bytes to read.
-
-
-Return Value
-------------
-The number of bytes actually read.
-
-
-Remarks
--------
-A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until either the entire bytesToRead is filled or
-you have reached the end of the stream.
-*/
-typedef size_t (* drflac_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead);
-
-/*
-Callback for when data needs to be seeked.
-
-
-Parameters
-----------
-pUserData (in)
-    The user data that was passed to drflac_open() and family.
-
-offset (in)
-    The number of bytes to move, relative to the origin. Will never be negative.
-
-origin (in)
-    The origin of the seek - the current position or the start of the stream.
-
-
-Return Value
-------------
-Whether or not the seek was successful.
-
-
-Remarks
--------
-The offset will never be negative. Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which will be
-either drflac_seek_origin_start or drflac_seek_origin_current.
-
-When seeking to a PCM frame using drflac_seek_to_pcm_frame(), dr_flac may call this with an offset beyond the end of the FLAC stream. This needs to be detected
-and handled by returning DRFLAC_FALSE.
-*/
-typedef drflac_bool32 (* drflac_seek_proc)(void* pUserData, int offset, drflac_seek_origin origin);
-
-/*
-Callback for when a metadata block is read.
-
-
-Parameters
-----------
-pUserData (in)
-    The user data that was passed to drflac_open() and family.
-
-pMetadata (in)
-    A pointer to a structure containing the data of the metadata block.
-
-
-Remarks
--------
-Use pMetadata->type to determine which metadata block is being handled and how to read the data.
-*/
-typedef void (* drflac_meta_proc)(void* pUserData, drflac_metadata* pMetadata);
-
-
-typedef struct
-{
-    void* pUserData;
-    void* (* onMalloc)(size_t sz, void* pUserData);
-    void* (* onRealloc)(void* p, size_t sz, void* pUserData);
-    void  (* onFree)(void* p, void* pUserData);
-} drflac_allocation_callbacks;
-
-/* Structure for internal use. Only used for decoders opened with drflac_open_memory. */
-typedef struct
-{
-    const drflac_uint8* data;
-    size_t dataSize;
-    size_t currentReadPos;
-} drflac__memory_stream;
-
-/* Structure for internal use. Used for bit streaming. */
-typedef struct
-{
-    /* The function to call when more data needs to be read. */
-    drflac_read_proc onRead;
-
-    /* The function to call when the current read position needs to be moved. */
-    drflac_seek_proc onSeek;
-
-    /* The user data to pass around to onRead and onSeek. */
-    void* pUserData;
-
-
-    /*
-    The number of unaligned bytes in the L2 cache. This will always be 0 until the end of the stream is hit. At the end of the
-    stream there will be a number of bytes that don't cleanly fit in an L1 cache line, so we use this variable to know whether
-    or not the bistreamer needs to run on a slower path to read those last bytes. This will never be more than sizeof(drflac_cache_t).
-    */
-    size_t unalignedByteCount;
-
-    /* The content of the unaligned bytes. */
-    drflac_cache_t unalignedCache;
-
-    /* The index of the next valid cache line in the "L2" cache. */
-    drflac_uint32 nextL2Line;
-
-    /* The number of bits that have been consumed by the cache. This is used to determine how many valid bits are remaining. */
-    drflac_uint32 consumedBits;
-
-    /*
-    The cached data which was most recently read from the client. There are two levels of cache. Data flows as such:
-    Client -> L2 -> L1. The L2 -> L1 movement is aligned and runs on a fast path in just a few instructions.
-    */
-    drflac_cache_t cacheL2[DR_FLAC_BUFFER_SIZE/sizeof(drflac_cache_t)];
-    drflac_cache_t cache;
-
-    /*
-    CRC-16. This is updated whenever bits are read from the bit stream. Manually set this to 0 to reset the CRC. For FLAC, this
-    is reset to 0 at the beginning of each frame.
-    */
-    drflac_uint16 crc16;
-    drflac_cache_t crc16Cache;              /* A cache for optimizing CRC calculations. This is filled when when the L1 cache is reloaded. */
-    drflac_uint32 crc16CacheIgnoredBytes;   /* The number of bytes to ignore when updating the CRC-16 from the CRC-16 cache. */
-} drflac_bs;
-
-typedef struct
-{
-    /* The type of the subframe: SUBFRAME_CONSTANT, SUBFRAME_VERBATIM, SUBFRAME_FIXED or SUBFRAME_LPC. */
-    drflac_uint8 subframeType;
-
-    /* The number of wasted bits per sample as specified by the sub-frame header. */
-    drflac_uint8 wastedBitsPerSample;
-
-    /* The order to use for the prediction stage for SUBFRAME_FIXED and SUBFRAME_LPC. */
-    drflac_uint8 lpcOrder;
-
-    /* A pointer to the buffer containing the decoded samples in the subframe. This pointer is an offset from drflac::pExtraData. */
-    drflac_int32* pSamplesS32;
-} drflac_subframe;
-
-typedef struct
-{
-    /*
-    If the stream uses variable block sizes, this will be set to the index of the first PCM frame. If fixed block sizes are used, this will
-    always be set to 0. This is 64-bit because the decoded PCM frame number will be 36 bits.
-    */
-    drflac_uint64 pcmFrameNumber;
-
-    /*
-    If the stream uses fixed block sizes, this will be set to the frame number. If variable block sizes are used, this will always be 0. This
-    is 32-bit because in fixed block sizes, the maximum frame number will be 31 bits.
-    */
-    drflac_uint32 flacFrameNumber;
-
-    /* The sample rate of this frame. */
-    drflac_uint32 sampleRate;
-
-    /* The number of PCM frames in each sub-frame within this frame. */
-    drflac_uint16 blockSizeInPCMFrames;
-
-    /*
-    The channel assignment of this frame. This is not always set to the channel count. If interchannel decorrelation is being used this
-    will be set to DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE, DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE or DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE.
-    */
-    drflac_uint8 channelAssignment;
-
-    /* The number of bits per sample within this frame. */
-    drflac_uint8 bitsPerSample;
-
-    /* The frame's CRC. */
-    drflac_uint8 crc8;
-} drflac_frame_header;
-
-typedef struct
-{
-    /* The header. */
-    drflac_frame_header header;
-
-    /*
-    The number of PCM frames left to be read in this FLAC frame. This is initially set to the block size. As PCM frames are read,
-    this will be decremented. When it reaches 0, the decoder will see this frame as fully consumed and load the next frame.
-    */
-    drflac_uint32 pcmFramesRemaining;
-
-    /* The list of sub-frames within the frame. There is one sub-frame for each channel, and there's a maximum of 8 channels. */
-    drflac_subframe subframes[8];
-} drflac_frame;
-
-typedef struct
-{
-    /* The function to call when a metadata block is read. */
-    drflac_meta_proc onMeta;
-
-    /* The user data posted to the metadata callback function. */
-    void* pUserDataMD;
-
-    /* Memory allocation callbacks. */
-    drflac_allocation_callbacks allocationCallbacks;
-
-
-    /* The sample rate. Will be set to something like 44100. */
-    drflac_uint32 sampleRate;
-
-    /*
-    The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. Maximum 8. This is set based on the
-    value specified in the STREAMINFO block.
-    */
-    drflac_uint8 channels;
-
-    /* The bits per sample. Will be set to something like 16, 24, etc. */
-    drflac_uint8 bitsPerSample;
-
-    /* The maximum block size, in samples. This number represents the number of samples in each channel (not combined). */
-    drflac_uint16 maxBlockSizeInPCMFrames;
-
-    /*
-    The total number of PCM Frames making up the stream. Can be 0 in which case it's still a valid stream, but just means
-    the total PCM frame count is unknown. Likely the case with streams like internet radio.
-    */
-    drflac_uint64 totalPCMFrameCount;
-
-
-    /* The container type. This is set based on whether or not the decoder was opened from a native or Ogg stream. */
-    drflac_container container;
-
-    /* The number of seekpoints in the seektable. */
-    drflac_uint32 seekpointCount;
-
-
-    /* Information about the frame the decoder is currently sitting on. */
-    drflac_frame currentFLACFrame;
-
-
-    /* The index of the PCM frame the decoder is currently sitting on. This is only used for seeking. */
-    drflac_uint64 currentPCMFrame;
-
-    /* The position of the first FLAC frame in the stream. This is only ever used for seeking. */
-    drflac_uint64 firstFLACFramePosInBytes;
-
-
-    /* A hack to avoid a malloc() when opening a decoder with drflac_open_memory(). */
-    drflac__memory_stream memoryStream;
-
-
-    /* A pointer to the decoded sample data. This is an offset of pExtraData. */
-    drflac_int32* pDecodedSamples;
-
-    /* A pointer to the seek table. This is an offset of pExtraData, or NULL if there is no seek table. */
-    drflac_seekpoint* pSeekpoints;
-
-    /* Internal use only. Only used with Ogg containers. Points to a drflac_oggbs object. This is an offset of pExtraData. */
-    void* _oggbs;
-
-    /* Internal use only. Used for profiling and testing different seeking modes. */
-    drflac_bool32 _noSeekTableSeek    : 1;
-    drflac_bool32 _noBinarySearchSeek : 1;
-    drflac_bool32 _noBruteForceSeek   : 1;
-
-    /* The bit streamer. The raw FLAC data is fed through this object. */
-    drflac_bs bs;
-
-    /* Variable length extra data. We attach this to the end of the object so we can avoid unnecessary mallocs. */
-    drflac_uint8 pExtraData[1];
-} drflac;
-
-
-/*
-Opens a FLAC decoder.
-
-
-Parameters
-----------
-onRead (in)
-    The function to call when data needs to be read from the client.
-
-onSeek (in)
-    The function to call when the read position of the client data needs to move.
-
-pUserData (in, optional)
-    A pointer to application defined data that will be passed to onRead and onSeek.
-
-pAllocationCallbacks (in, optional)
-    A pointer to application defined callbacks for managing memory allocations.
-
-
-Return Value
-------------
-Returns a pointer to an object representing the decoder.
-
-
-Remarks
--------
-Close the decoder with `drflac_close()`.
-
-`pAllocationCallbacks` can be NULL in which case it will use `DRFLAC_MALLOC`, `DRFLAC_REALLOC` and `DRFLAC_FREE`.
-
-This function will automatically detect whether or not you are attempting to open a native or Ogg encapsulated FLAC, both of which should work seamlessly
-without any manual intervention. Ogg encapsulation also works with multiplexed streams which basically means it can play FLAC encoded audio tracks in videos.
-
-This is the lowest level function for opening a FLAC stream. You can also use `drflac_open_file()` and `drflac_open_memory()` to open the stream from a file or
-from a block of memory respectively.
-
-The STREAMINFO block must be present for this to succeed. Use `drflac_open_relaxed()` to open a FLAC stream where the header may not be present.
-
-
-Seek Also
----------
-drflac_open_file()
-drflac_open_memory()
-drflac_open_with_metadata()
-drflac_close()
-*/
-DRFLAC_API drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/*
-Opens a FLAC stream with relaxed validation of the header block.
-
-
-Parameters
-----------
-onRead (in)
-    The function to call when data needs to be read from the client.
-
-onSeek (in)
-    The function to call when the read position of the client data needs to move.
-
-container (in)
-    Whether or not the FLAC stream is encapsulated using standard FLAC encapsulation or Ogg encapsulation.
-
-pUserData (in, optional)
-    A pointer to application defined data that will be passed to onRead and onSeek.
-
-pAllocationCallbacks (in, optional)
-    A pointer to application defined callbacks for managing memory allocations.
-
-
-Return Value
-------------
-A pointer to an object representing the decoder.
-
-
-Remarks
--------
-The same as drflac_open(), except attempts to open the stream even when a header block is not present.
-
-Because the header is not necessarily available, the caller must explicitly define the container (Native or Ogg). Do not set this to `drflac_container_unknown`
-as that is for internal use only.
-
-Opening in relaxed mode will continue reading data from onRead until it finds a valid frame. If a frame is never found it will continue forever. To abort,
-force your `onRead` callback to return 0, which dr_flac will use as an indicator that the end of the stream was found.
-*/
-DRFLAC_API drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/*
-Opens a FLAC decoder and notifies the caller of the metadata chunks (album art, etc.).
-
-
-Parameters
-----------
-onRead (in)
-    The function to call when data needs to be read from the client.
-
-onSeek (in)
-    The function to call when the read position of the client data needs to move.
-
-onMeta (in)
-    The function to call for every metadata block.
-
-pUserData (in, optional)
-    A pointer to application defined data that will be passed to onRead, onSeek and onMeta.
-
-pAllocationCallbacks (in, optional)
-    A pointer to application defined callbacks for managing memory allocations.
-
-
-Return Value
-------------
-A pointer to an object representing the decoder.
-
-
-Remarks
--------
-Close the decoder with `drflac_close()`.
-
-`pAllocationCallbacks` can be NULL in which case it will use `DRFLAC_MALLOC`, `DRFLAC_REALLOC` and `DRFLAC_FREE`.
-
-This is slower than `drflac_open()`, so avoid this one if you don't need metadata. Internally, this will allocate and free memory on the heap for every
-metadata block except for STREAMINFO and PADDING blocks.
-
-The caller is notified of the metadata via the `onMeta` callback. All metadata blocks will be handled before the function returns.
-
-The STREAMINFO block must be present for this to succeed. Use `drflac_open_with_metadata_relaxed()` to open a FLAC stream where the header may not be present.
-
-Note that this will behave inconsistently with `drflac_open()` if the stream is an Ogg encapsulated stream and a metadata block is corrupted. This is due to
-the way the Ogg stream recovers from corrupted pages. When `drflac_open_with_metadata()` is being used, the open routine will try to read the contents of the
-metadata block, whereas `drflac_open()` will simply seek past it (for the sake of efficiency). This inconsistency can result in different samples being
-returned depending on whether or not the stream is being opened with metadata.
-
-
-Seek Also
----------
-drflac_open_file_with_metadata()
-drflac_open_memory_with_metadata()
-drflac_open()
-drflac_close()
-*/
-DRFLAC_API drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/*
-The same as drflac_open_with_metadata(), except attempts to open the stream even when a header block is not present.
-
-See Also
---------
-drflac_open_with_metadata()
-drflac_open_relaxed()
-*/
-DRFLAC_API drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/*
-Closes the given FLAC decoder.
-
-
-Parameters
-----------
-pFlac (in)
-    The decoder to close.
-
-
-Remarks
--------
-This will destroy the decoder object.
-
-
-See Also
---------
-drflac_open()
-drflac_open_with_metadata()
-drflac_open_file()
-drflac_open_file_w()
-drflac_open_file_with_metadata()
-drflac_open_file_with_metadata_w()
-drflac_open_memory()
-drflac_open_memory_with_metadata()
-*/
-DRFLAC_API void drflac_close(drflac* pFlac);
-
-
-/*
-Reads sample data from the given FLAC decoder, output as interleaved signed 32-bit PCM.
-
-
-Parameters
-----------
-pFlac (in)
-    The decoder.
-
-framesToRead (in)
-    The number of PCM frames to read.
-
-pBufferOut (out, optional)
-    A pointer to the buffer that will receive the decoded samples.
-
-
-Return Value
-------------
-Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end.
-
-
-Remarks
--------
-pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked.
-*/
-DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut);
-
-
-/*
-Reads sample data from the given FLAC decoder, output as interleaved signed 16-bit PCM.
-
-
-Parameters
-----------
-pFlac (in)
-    The decoder.
-
-framesToRead (in)
-    The number of PCM frames to read.
-
-pBufferOut (out, optional)
-    A pointer to the buffer that will receive the decoded samples.
-
-
-Return Value
-------------
-Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end.
-
-
-Remarks
--------
-pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked.
-
-Note that this is lossy for streams where the bits per sample is larger than 16.
-*/
-DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut);
-
-/*
-Reads sample data from the given FLAC decoder, output as interleaved 32-bit floating point PCM.
-
-
-Parameters
-----------
-pFlac (in)
-    The decoder.
-
-framesToRead (in)
-    The number of PCM frames to read.
-
-pBufferOut (out, optional)
-    A pointer to the buffer that will receive the decoded samples.
-
-
-Return Value
-------------
-Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end.
-
-
-Remarks
--------
-pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked.
-
-Note that this should be considered lossy due to the nature of floating point numbers not being able to exactly represent every possible number.
-*/
-DRFLAC_API drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, float* pBufferOut);
-
-/*
-Seeks to the PCM frame at the given index.
-
-
-Parameters
-----------
-pFlac (in)
-    The decoder.
-
-pcmFrameIndex (in)
-    The index of the PCM frame to seek to. See notes below.
-
-
-Return Value
--------------
-`DRFLAC_TRUE` if successful; `DRFLAC_FALSE` otherwise.
-*/
-DRFLAC_API drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex);
-
-
-
-#ifndef DR_FLAC_NO_STDIO
-/*
-Opens a FLAC decoder from the file at the given path.
-
-
-Parameters
-----------
-pFileName (in)
-    The path of the file to open, either absolute or relative to the current directory.
-
-pAllocationCallbacks (in, optional)
-    A pointer to application defined callbacks for managing memory allocations.
-
-
-Return Value
-------------
-A pointer to an object representing the decoder.
-
-
-Remarks
--------
-Close the decoder with drflac_close().
-
-
-Remarks
--------
-This will hold a handle to the file until the decoder is closed with drflac_close(). Some platforms will restrict the number of files a process can have open
-at any given time, so keep this mind if you have many decoders open at the same time.
-
-
-See Also
---------
-drflac_open_file_with_metadata()
-drflac_open()
-drflac_close()
-*/
-DRFLAC_API drflac* drflac_open_file(const char* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks);
-DRFLAC_API drflac* drflac_open_file_w(const wchar_t* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/*
-Opens a FLAC decoder from the file at the given path and notifies the caller of the metadata chunks (album art, etc.)
-
-
-Parameters
-----------
-pFileName (in)
-    The path of the file to open, either absolute or relative to the current directory.
-
-pAllocationCallbacks (in, optional)
-    A pointer to application defined callbacks for managing memory allocations.
-
-onMeta (in)
-    The callback to fire for each metadata block.
-
-pUserData (in)
-    A pointer to the user data to pass to the metadata callback.
-
-pAllocationCallbacks (in)
-    A pointer to application defined callbacks for managing memory allocations.
-
-
-Remarks
--------
-Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled.
-
-
-See Also
---------
-drflac_open_with_metadata()
-drflac_open()
-drflac_close()
-*/
-DRFLAC_API drflac* drflac_open_file_with_metadata(const char* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
-DRFLAC_API drflac* drflac_open_file_with_metadata_w(const wchar_t* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
-#endif
-
-/*
-Opens a FLAC decoder from a pre-allocated block of memory
-
-
-Parameters
-----------
-pData (in)
-    A pointer to the raw encoded FLAC data.
-
-dataSize (in)
-    The size in bytes of `data`.
-
-pAllocationCallbacks (in)
-    A pointer to application defined callbacks for managing memory allocations.
-
-
-Return Value
-------------
-A pointer to an object representing the decoder.
-
-
-Remarks
--------
-This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for the lifetime of the decoder.
-
-
-See Also
---------
-drflac_open()
-drflac_close()
-*/
-DRFLAC_API drflac* drflac_open_memory(const void* pData, size_t dataSize, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/*
-Opens a FLAC decoder from a pre-allocated block of memory and notifies the caller of the metadata chunks (album art, etc.)
-
-
-Parameters
-----------
-pData (in)
-    A pointer to the raw encoded FLAC data.
-
-dataSize (in)
-    The size in bytes of `data`.
-
-onMeta (in)
-    The callback to fire for each metadata block.
-
-pUserData (in)
-    A pointer to the user data to pass to the metadata callback.
-
-pAllocationCallbacks (in)
-    A pointer to application defined callbacks for managing memory allocations.
-
-
-Remarks
--------
-Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled.
-
-
-See Also
--------
-drflac_open_with_metadata()
-drflac_open()
-drflac_close()
-*/
-DRFLAC_API drflac* drflac_open_memory_with_metadata(const void* pData, size_t dataSize, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-
-
-/* High Level APIs */
-
-/*
-Opens a FLAC stream from the given callbacks and fully decodes it in a single operation. The return value is a
-pointer to the sample data as interleaved signed 32-bit PCM. The returned data must be freed with drflac_free().
-
-You can pass in custom memory allocation callbacks via the pAllocationCallbacks parameter. This can be NULL in which
-case it will use DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE.
-
-Sometimes a FLAC file won't keep track of the total sample count. In this situation the function will continuously
-read samples into a dynamically sized buffer on the heap until no samples are left.
-
-Do not call this function on a broadcast type of stream (like internet radio streams and whatnot).
-*/
-DRFLAC_API drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/* Same as drflac_open_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */
-DRFLAC_API drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/* Same as drflac_open_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */
-DRFLAC_API float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-#ifndef DR_FLAC_NO_STDIO
-/* Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a file. */
-DRFLAC_API drflac_int32* drflac_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/* Same as drflac_open_file_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */
-DRFLAC_API drflac_int16* drflac_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/* Same as drflac_open_file_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */
-DRFLAC_API float* drflac_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
-#endif
-
-/* Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a block of memory. */
-DRFLAC_API drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/* Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */
-DRFLAC_API drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/* Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */
-DRFLAC_API float* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-/*
-Frees memory that was allocated internally by dr_flac.
-
-Set pAllocationCallbacks to the same object that was passed to drflac_open_*_and_read_pcm_frames_*(). If you originally passed in NULL, pass in NULL for this.
-*/
-DRFLAC_API void drflac_free(void* p, const drflac_allocation_callbacks* pAllocationCallbacks);
-
-
-/* Structure representing an iterator for vorbis comments in a VORBIS_COMMENT metadata block. */
-typedef struct
-{
-    drflac_uint32 countRemaining;
-    const char* pRunningData;
-} drflac_vorbis_comment_iterator;
-
-/*
-Initializes a vorbis comment iterator. This can be used for iterating over the vorbis comments in a VORBIS_COMMENT
-metadata block.
-*/
-DRFLAC_API void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const void* pComments);
-
-/*
-Goes to the next vorbis comment in the given iterator. If null is returned it means there are no more comments. The
-returned string is NOT null terminated.
-*/
-DRFLAC_API const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut);
-
-
-/* Structure representing an iterator for cuesheet tracks in a CUESHEET metadata block. */
-typedef struct
-{
-    drflac_uint32 countRemaining;
-    const char* pRunningData;
-} drflac_cuesheet_track_iterator;
-
-/* Packing is important on this structure because we map this directly to the raw data within the CUESHEET metadata block. */
-#pragma pack(4)
-typedef struct
-{
-    drflac_uint64 offset;
-    drflac_uint8 index;
-    drflac_uint8 reserved[3];
-} drflac_cuesheet_track_index;
-#pragma pack()
-
-typedef struct
-{
-    drflac_uint64 offset;
-    drflac_uint8 trackNumber;
-    char ISRC[12];
-    drflac_bool8 isAudio;
-    drflac_bool8 preEmphasis;
-    drflac_uint8 indexCount;
-    const drflac_cuesheet_track_index* pIndexPoints;
-} drflac_cuesheet_track;
-
-/*
-Initializes a cuesheet track iterator. This can be used for iterating over the cuesheet tracks in a CUESHEET metadata
-block.
-*/
-DRFLAC_API void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_uint32 trackCount, const void* pTrackData);
-
-/* Goes to the next cuesheet track in the given iterator. If DRFLAC_FALSE is returned it means there are no more comments. */
-DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack);
-
-
-#ifdef __cplusplus
-}
-#endif
-#endif  /* dr_flac_h */
-
-
-/************************************************************************************************************************************************************
- ************************************************************************************************************************************************************
-
- IMPLEMENTATION
-
- ************************************************************************************************************************************************************
- ************************************************************************************************************************************************************/
-#if defined(DR_FLAC_IMPLEMENTATION) || defined(DRFLAC_IMPLEMENTATION)
-
-/* Disable some annoying warnings. */
-#if defined(__GNUC__)
-    #pragma GCC diagnostic push
-    #if __GNUC__ >= 7
-    #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
-    #endif
-#endif
-
-#ifdef __linux__
-    #ifndef _BSD_SOURCE
-        #define _BSD_SOURCE
-    #endif
-    #ifndef __USE_BSD
-        #define __USE_BSD
-    #endif
-    #include <endian.h>
-#endif
-
-#include <stdlib.h>
-#include <string.h>
-
-#ifdef _MSC_VER
-    #define DRFLAC_INLINE __forceinline
-#elif defined(__GNUC__)
-    /*
-    I've had a bug report where GCC is emitting warnings about functions possibly not being inlineable. This warning happens when
-    the __attribute__((always_inline)) attribute is defined without an "inline" statement. I think therefore there must be some
-    case where "__inline__" is not always defined, thus the compiler emitting these warnings. When using -std=c89 or -ansi on the
-    command line, we cannot use the "inline" keyword and instead need to use "__inline__". In an attempt to work around this issue
-    I am using "__inline__" only when we're compiling in strict ANSI mode.
-    */
-    #if defined(__STRICT_ANSI__)
-        #define DRFLAC_INLINE __inline__ __attribute__((always_inline))
-    #else
-        #define DRFLAC_INLINE inline __attribute__((always_inline))
-    #endif
-#else
-    #define DRFLAC_INLINE
-#endif
-
-/* CPU architecture. */
-#if defined(__x86_64__) || defined(_M_X64)
-    #define DRFLAC_X64
-#elif defined(__i386) || defined(_M_IX86)
-    #define DRFLAC_X86
-#elif defined(__arm__) || defined(_M_ARM)
-    #define DRFLAC_ARM
-#endif
-
-/* Intrinsics Support */
-#if !defined(DR_FLAC_NO_SIMD)
-    #if defined(DRFLAC_X64) || defined(DRFLAC_X86)
-        #if defined(_MSC_VER) && !defined(__clang__)
-            /* MSVC. */
-            #if _MSC_VER >= 1400 && !defined(DRFLAC_NO_SSE2)    /* 2005 */
-                #define DRFLAC_SUPPORT_SSE2
-            #endif
-            #if _MSC_VER >= 1600 && !defined(DRFLAC_NO_SSE41)   /* 2010 */
-                #define DRFLAC_SUPPORT_SSE41
-            #endif
-        #else
-            /* Assume GNUC-style. */
-            #if defined(__SSE2__) && !defined(DRFLAC_NO_SSE2)
-                #define DRFLAC_SUPPORT_SSE2
-            #endif
-            #if defined(__SSE4_1__) && !defined(DRFLAC_NO_SSE41)
-                #define DRFLAC_SUPPORT_SSE41
-            #endif
-        #endif
-
-        /* If at this point we still haven't determined compiler support for the intrinsics just fall back to __has_include. */
-        #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include)
-            #if !defined(DRFLAC_SUPPORT_SSE2) && !defined(DRFLAC_NO_SSE2) && __has_include(<emmintrin.h>)
-                #define DRFLAC_SUPPORT_SSE2
-            #endif
-            #if !defined(DRFLAC_SUPPORT_SSE41) && !defined(DRFLAC_NO_SSE41) && __has_include(<smmintrin.h>)
-                #define DRFLAC_SUPPORT_SSE41
-            #endif
-        #endif
-
-        #if defined(DRFLAC_SUPPORT_SSE41)
-            #include <smmintrin.h>
-        #elif defined(DRFLAC_SUPPORT_SSE2)
-            #include <emmintrin.h>
-        #endif
-    #endif
-
-    #if defined(DRFLAC_ARM)
-        #if !defined(DRFLAC_NO_NEON) && (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64))
-            #define DRFLAC_SUPPORT_NEON
-        #endif
-
-        /* Fall back to looking for the #include file. */
-        #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include)
-            #if !defined(DRFLAC_SUPPORT_NEON) && !defined(DRFLAC_NO_NEON) && __has_include(<arm_neon.h>)
-                #define DRFLAC_SUPPORT_NEON
-            #endif
-        #endif
-
-        #if defined(DRFLAC_SUPPORT_NEON)
-            #include <arm_neon.h>
-        #endif
-    #endif
-#endif
-
-/* Compile-time CPU feature support. */
-#if !defined(DR_FLAC_NO_SIMD) && (defined(DRFLAC_X86) || defined(DRFLAC_X64))
-    #if defined(_MSC_VER) && !defined(__clang__)
-        #if _MSC_VER >= 1400
-            #include <intrin.h>
-            static void drflac__cpuid(int info[4], int fid)
-            {
-                __cpuid(info, fid);
-            }
-        #else
-            #define DRFLAC_NO_CPUID
-        #endif
-    #else
-        #if defined(__GNUC__) || defined(__clang__)
-            static void drflac__cpuid(int info[4], int fid)
-            {
-                /*
-                It looks like the -fPIC option uses the ebx register which GCC complains about. We can work around this by just using a different register, the
-                specific register of which I'm letting the compiler decide on. The "k" prefix is used to specify a 32-bit register. The {...} syntax is for
-                supporting different assembly dialects.
-
-                What's basically happening is that we're saving and restoring the ebx register manually.
-                */
-                #if defined(DRFLAC_X86) && defined(__PIC__)
-                    __asm__ __volatile__ (
-                        "xchg{l} {%%}ebx, %k1;"
-                        "cpuid;"
-                        "xchg{l} {%%}ebx, %k1;"
-                        : "=a"(info[0]), "=&r"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(fid), "c"(0)
-                    );
-                #else
-                    __asm__ __volatile__ (
-                        "cpuid" : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(fid), "c"(0)
-                    );
-                #endif
-            }
-        #else
-            #define DRFLAC_NO_CPUID
-        #endif
-    #endif
-#else
-    #define DRFLAC_NO_CPUID
-#endif
-
-static DRFLAC_INLINE drflac_bool32 drflac_has_sse2(void)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE2)
-        #if defined(DRFLAC_X64)
-            return DRFLAC_TRUE;    /* 64-bit targets always support SSE2. */
-        #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE2__)
-            return DRFLAC_TRUE;    /* If the compiler is allowed to freely generate SSE2 code we can assume support. */
-        #else
-            #if defined(DRFLAC_NO_CPUID)
-                return DRFLAC_FALSE;
-            #else
-                int info[4];
-                drflac__cpuid(info, 1);
-                return (info[3] & (1 << 26)) != 0;
-            #endif
-        #endif
-    #else
-        return DRFLAC_FALSE;       /* SSE2 is only supported on x86 and x64 architectures. */
-    #endif
-#else
-    return DRFLAC_FALSE;           /* No compiler support. */
-#endif
-}
-
-static DRFLAC_INLINE drflac_bool32 drflac_has_sse41(void)
-{
-#if defined(DRFLAC_SUPPORT_SSE41)
-    #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE41)
-        #if defined(DRFLAC_X64)
-            return DRFLAC_TRUE;    /* 64-bit targets always support SSE4.1. */
-        #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE4_1__)
-            return DRFLAC_TRUE;    /* If the compiler is allowed to freely generate SSE41 code we can assume support. */
-        #else
-            #if defined(DRFLAC_NO_CPUID)
-                return DRFLAC_FALSE;
-            #else
-                int info[4];
-                drflac__cpuid(info, 1);
-                return (info[2] & (1 << 19)) != 0;
-            #endif
-        #endif
-    #else
-        return DRFLAC_FALSE;       /* SSE41 is only supported on x86 and x64 architectures. */
-    #endif
-#else
-    return DRFLAC_FALSE;           /* No compiler support. */
-#endif
-}
-
-
-#if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(DRFLAC_X86) || defined(DRFLAC_X64))
-    #define DRFLAC_HAS_LZCNT_INTRINSIC
-#elif (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)))
-    #define DRFLAC_HAS_LZCNT_INTRINSIC
-#elif defined(__clang__)
-    #if defined(__has_builtin)
-        #if __has_builtin(__builtin_clzll) || __has_builtin(__builtin_clzl)
-            #define DRFLAC_HAS_LZCNT_INTRINSIC
-        #endif
-    #endif
-#endif
-
-#if defined(_MSC_VER) && _MSC_VER >= 1400
-    #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
-    #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
-    #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
-#elif defined(__clang__)
-    #if defined(__has_builtin)
-        #if __has_builtin(__builtin_bswap16)
-            #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
-        #endif
-        #if __has_builtin(__builtin_bswap32)
-            #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
-        #endif
-        #if __has_builtin(__builtin_bswap64)
-            #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
-        #endif
-    #endif
-#elif defined(__GNUC__)
-    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
-        #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
-        #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
-    #endif
-    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
-        #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
-    #endif
-#endif
-
-
-/* Standard library stuff. */
-#ifndef DRFLAC_ASSERT
-#include <assert.h>
-#define DRFLAC_ASSERT(expression)           assert(expression)
-#endif
-#ifndef DRFLAC_MALLOC
-#define DRFLAC_MALLOC(sz)                   malloc((sz))
-#endif
-#ifndef DRFLAC_REALLOC
-#define DRFLAC_REALLOC(p, sz)               realloc((p), (sz))
-#endif
-#ifndef DRFLAC_FREE
-#define DRFLAC_FREE(p)                      free((p))
-#endif
-#ifndef DRFLAC_COPY_MEMORY
-#define DRFLAC_COPY_MEMORY(dst, src, sz)    memcpy((dst), (src), (sz))
-#endif
-#ifndef DRFLAC_ZERO_MEMORY
-#define DRFLAC_ZERO_MEMORY(p, sz)           memset((p), 0, (sz))
-#endif
-#ifndef DRFLAC_ZERO_OBJECT
-#define DRFLAC_ZERO_OBJECT(p)               DRFLAC_ZERO_MEMORY((p), sizeof(*(p)))
-#endif
-
-#define DRFLAC_MAX_SIMD_VECTOR_SIZE                     64  /* 64 for AVX-512 in the future. */
-
-typedef drflac_int32 drflac_result;
-#define DRFLAC_SUCCESS                                   0
-#define DRFLAC_ERROR                                    -1   /* A generic error. */
-#define DRFLAC_INVALID_ARGS                             -2
-#define DRFLAC_INVALID_OPERATION                        -3
-#define DRFLAC_OUT_OF_MEMORY                            -4
-#define DRFLAC_OUT_OF_RANGE                             -5
-#define DRFLAC_ACCESS_DENIED                            -6
-#define DRFLAC_DOES_NOT_EXIST                           -7
-#define DRFLAC_ALREADY_EXISTS                           -8
-#define DRFLAC_TOO_MANY_OPEN_FILES                      -9
-#define DRFLAC_INVALID_FILE                             -10
-#define DRFLAC_TOO_BIG                                  -11
-#define DRFLAC_PATH_TOO_LONG                            -12
-#define DRFLAC_NAME_TOO_LONG                            -13
-#define DRFLAC_NOT_DIRECTORY                            -14
-#define DRFLAC_IS_DIRECTORY                             -15
-#define DRFLAC_DIRECTORY_NOT_EMPTY                      -16
-#define DRFLAC_END_OF_FILE                              -17
-#define DRFLAC_NO_SPACE                                 -18
-#define DRFLAC_BUSY                                     -19
-#define DRFLAC_IO_ERROR                                 -20
-#define DRFLAC_INTERRUPT                                -21
-#define DRFLAC_UNAVAILABLE                              -22
-#define DRFLAC_ALREADY_IN_USE                           -23
-#define DRFLAC_BAD_ADDRESS                              -24
-#define DRFLAC_BAD_SEEK                                 -25
-#define DRFLAC_BAD_PIPE                                 -26
-#define DRFLAC_DEADLOCK                                 -27
-#define DRFLAC_TOO_MANY_LINKS                           -28
-#define DRFLAC_NOT_IMPLEMENTED                          -29
-#define DRFLAC_NO_MESSAGE                               -30
-#define DRFLAC_BAD_MESSAGE                              -31
-#define DRFLAC_NO_DATA_AVAILABLE                        -32
-#define DRFLAC_INVALID_DATA                             -33
-#define DRFLAC_TIMEOUT                                  -34
-#define DRFLAC_NO_NETWORK                               -35
-#define DRFLAC_NOT_UNIQUE                               -36
-#define DRFLAC_NOT_SOCKET                               -37
-#define DRFLAC_NO_ADDRESS                               -38
-#define DRFLAC_BAD_PROTOCOL                             -39
-#define DRFLAC_PROTOCOL_UNAVAILABLE                     -40
-#define DRFLAC_PROTOCOL_NOT_SUPPORTED                   -41
-#define DRFLAC_PROTOCOL_FAMILY_NOT_SUPPORTED            -42
-#define DRFLAC_ADDRESS_FAMILY_NOT_SUPPORTED             -43
-#define DRFLAC_SOCKET_NOT_SUPPORTED                     -44
-#define DRFLAC_CONNECTION_RESET                         -45
-#define DRFLAC_ALREADY_CONNECTED                        -46
-#define DRFLAC_NOT_CONNECTED                            -47
-#define DRFLAC_CONNECTION_REFUSED                       -48
-#define DRFLAC_NO_HOST                                  -49
-#define DRFLAC_IN_PROGRESS                              -50
-#define DRFLAC_CANCELLED                                -51
-#define DRFLAC_MEMORY_ALREADY_MAPPED                    -52
-#define DRFLAC_AT_END                                   -53
-#define DRFLAC_CRC_MISMATCH                             -128
-
-#define DRFLAC_SUBFRAME_CONSTANT                        0
-#define DRFLAC_SUBFRAME_VERBATIM                        1
-#define DRFLAC_SUBFRAME_FIXED                           8
-#define DRFLAC_SUBFRAME_LPC                             32
-#define DRFLAC_SUBFRAME_RESERVED                        255
-
-#define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE  0
-#define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2 1
-
-#define DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT           0
-#define DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE             8
-#define DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE            9
-#define DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE              10
-
-#define drflac_align(x, a)                              ((((x) + (a) - 1) / (a)) * (a))
-
-
-DRFLAC_API void drflac_version(drflac_uint32* pMajor, drflac_uint32* pMinor, drflac_uint32* pRevision)
-{
-    if (pMajor) {
-        *pMajor = DRFLAC_VERSION_MAJOR;
-    }
-
-    if (pMinor) {
-        *pMinor = DRFLAC_VERSION_MINOR;
-    }
-
-    if (pRevision) {
-        *pRevision = DRFLAC_VERSION_REVISION;
-    }
-}
-
-DRFLAC_API const char* drflac_version_string()
-{
-    return DRFLAC_VERSION_STRING;
-}
-
-
-/* CPU caps. */
-#if defined(__has_feature)
-    #if __has_feature(thread_sanitizer)
-        #define DRFLAC_NO_THREAD_SANITIZE __attribute__((no_sanitize("thread")))
-    #else
-        #define DRFLAC_NO_THREAD_SANITIZE
-    #endif
-#else
-    #define DRFLAC_NO_THREAD_SANITIZE
-#endif
-
-#if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
-static drflac_bool32 drflac__gIsLZCNTSupported = DRFLAC_FALSE;
-#endif
-
-#ifndef DRFLAC_NO_CPUID
-static drflac_bool32 drflac__gIsSSE2Supported  = DRFLAC_FALSE;
-static drflac_bool32 drflac__gIsSSE41Supported = DRFLAC_FALSE;
-
-/*
-I've had a bug report that Clang's ThreadSanitizer presents a warning in this function. Having reviewed this, this does
-actually make sense. However, since CPU caps should never differ for a running process, I don't think the trade off of
-complicating internal API's by passing around CPU caps versus just disabling the warnings is worthwhile. I'm therefore
-just going to disable these warnings. This is disabled via the DRFLAC_NO_THREAD_SANITIZE attribute.
-*/
-DRFLAC_NO_THREAD_SANITIZE static void drflac__init_cpu_caps(void)
-{
-    static drflac_bool32 isCPUCapsInitialized = DRFLAC_FALSE;
-
-    if (!isCPUCapsInitialized) {
-        /* LZCNT */
-#if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
-        int info[4] = {0};
-        drflac__cpuid(info, 0x80000001);
-        drflac__gIsLZCNTSupported = (info[2] & (1 << 5)) != 0;
-#endif
-
-        /* SSE2 */
-        drflac__gIsSSE2Supported = drflac_has_sse2();
-
-        /* SSE4.1 */
-        drflac__gIsSSE41Supported = drflac_has_sse41();
-
-        /* Initialized. */
-        isCPUCapsInitialized = DRFLAC_TRUE;
-    }
-}
-#else
-static drflac_bool32 drflac__gIsNEONSupported  = DRFLAC_FALSE;
-
-static DRFLAC_INLINE drflac_bool32 drflac__has_neon(void)
-{
-#if defined(DRFLAC_SUPPORT_NEON)
-    #if defined(DRFLAC_ARM) && !defined(DRFLAC_NO_NEON)
-        #if (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64))
-            return DRFLAC_TRUE;    /* If the compiler is allowed to freely generate NEON code we can assume support. */
-        #else
-            /* TODO: Runtime check. */
-            return DRFLAC_FALSE;
-        #endif
-    #else
-        return DRFLAC_FALSE;       /* NEON is only supported on ARM architectures. */
-    #endif
-#else
-    return DRFLAC_FALSE;           /* No compiler support. */
-#endif
-}
-
-DRFLAC_NO_THREAD_SANITIZE static void drflac__init_cpu_caps(void)
-{
-    drflac__gIsNEONSupported = drflac__has_neon();
-
-#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5)
-    drflac__gIsLZCNTSupported = DRFLAC_TRUE;
-#endif
-}
-#endif
-
-
-/* Endian Management */
-static DRFLAC_INLINE drflac_bool32 drflac__is_little_endian(void)
-{
-#if defined(DRFLAC_X86) || defined(DRFLAC_X64)
-    return DRFLAC_TRUE;
-#elif defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN
-    return DRFLAC_TRUE;
-#else
-    int n = 1;
-    return (*(char*)&n) == 1;
-#endif
-}
-
-static DRFLAC_INLINE drflac_uint16 drflac__swap_endian_uint16(drflac_uint16 n)
-{
-#ifdef DRFLAC_HAS_BYTESWAP16_INTRINSIC
-    #if defined(_MSC_VER)
-        return _byteswap_ushort(n);
-    #elif defined(__GNUC__) || defined(__clang__)
-        return __builtin_bswap16(n);
-    #else
-        #error "This compiler does not support the byte swap intrinsic."
-    #endif
-#else
-    return ((n & 0xFF00) >> 8) |
-           ((n & 0x00FF) << 8);
-#endif
-}
-
-static DRFLAC_INLINE drflac_uint32 drflac__swap_endian_uint32(drflac_uint32 n)
-{
-#ifdef DRFLAC_HAS_BYTESWAP32_INTRINSIC
-    #if defined(_MSC_VER)
-        return _byteswap_ulong(n);
-    #elif defined(__GNUC__) || defined(__clang__)
-        #if defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 6) && !defined(DRFLAC_64BIT)   /* <-- 64-bit inline assembly has not been tested, so disabling for now. */
-            /* Inline assembly optimized implementation for ARM. In my testing, GCC does not generate optimized code with __builtin_bswap32(). */
-            drflac_uint32 r;
-            __asm__ __volatile__ (
-            #if defined(DRFLAC_64BIT)
-                "rev %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(n)   /* <-- This is untested. If someone in the community could test this, that would be appreciated! */
-            #else
-                "rev %[out], %[in]" : [out]"=r"(r) : [in]"r"(n)
-            #endif
-            );
-            return r;
-        #else
-            return __builtin_bswap32(n);
-        #endif
-    #else
-        #error "This compiler does not support the byte swap intrinsic."
-    #endif
-#else
-    return ((n & 0xFF000000) >> 24) |
-           ((n & 0x00FF0000) >>  8) |
-           ((n & 0x0000FF00) <<  8) |
-           ((n & 0x000000FF) << 24);
-#endif
-}
-
-static DRFLAC_INLINE drflac_uint64 drflac__swap_endian_uint64(drflac_uint64 n)
-{
-#ifdef DRFLAC_HAS_BYTESWAP64_INTRINSIC
-    #if defined(_MSC_VER)
-        return _byteswap_uint64(n);
-    #elif defined(__GNUC__) || defined(__clang__)
-        return __builtin_bswap64(n);
-    #else
-        #error "This compiler does not support the byte swap intrinsic."
-    #endif
-#else
-    return ((n & (drflac_uint64)0xFF00000000000000) >> 56) |
-           ((n & (drflac_uint64)0x00FF000000000000) >> 40) |
-           ((n & (drflac_uint64)0x0000FF0000000000) >> 24) |
-           ((n & (drflac_uint64)0x000000FF00000000) >>  8) |
-           ((n & (drflac_uint64)0x00000000FF000000) <<  8) |
-           ((n & (drflac_uint64)0x0000000000FF0000) << 24) |
-           ((n & (drflac_uint64)0x000000000000FF00) << 40) |
-           ((n & (drflac_uint64)0x00000000000000FF) << 56);
-#endif
-}
-
-
-static DRFLAC_INLINE drflac_uint16 drflac__be2host_16(drflac_uint16 n)
-{
-    if (drflac__is_little_endian()) {
-        return drflac__swap_endian_uint16(n);
-    }
-
-    return n;
-}
-
-static DRFLAC_INLINE drflac_uint32 drflac__be2host_32(drflac_uint32 n)
-{
-    if (drflac__is_little_endian()) {
-        return drflac__swap_endian_uint32(n);
-    }
-
-    return n;
-}
-
-static DRFLAC_INLINE drflac_uint64 drflac__be2host_64(drflac_uint64 n)
-{
-    if (drflac__is_little_endian()) {
-        return drflac__swap_endian_uint64(n);
-    }
-
-    return n;
-}
-
-
-static DRFLAC_INLINE drflac_uint32 drflac__le2host_32(drflac_uint32 n)
-{
-    if (!drflac__is_little_endian()) {
-        return drflac__swap_endian_uint32(n);
-    }
-
-    return n;
-}
-
-
-static DRFLAC_INLINE drflac_uint32 drflac__unsynchsafe_32(drflac_uint32 n)
-{
-    drflac_uint32 result = 0;
-    result |= (n & 0x7F000000) >> 3;
-    result |= (n & 0x007F0000) >> 2;
-    result |= (n & 0x00007F00) >> 1;
-    result |= (n & 0x0000007F) >> 0;
-
-    return result;
-}
-
-
-
-/* The CRC code below is based on this document: http://zlib.net/crc_v3.txt */
-static drflac_uint8 drflac__crc8_table[] = {
-    0x00, 0x07, 0x0E, 0x09, 0x1C, 0x1B, 0x12, 0x15, 0x38, 0x3F, 0x36, 0x31, 0x24, 0x23, 0x2A, 0x2D,
-    0x70, 0x77, 0x7E, 0x79, 0x6C, 0x6B, 0x62, 0x65, 0x48, 0x4F, 0x46, 0x41, 0x54, 0x53, 0x5A, 0x5D,
-    0xE0, 0xE7, 0xEE, 0xE9, 0xFC, 0xFB, 0xF2, 0xF5, 0xD8, 0xDF, 0xD6, 0xD1, 0xC4, 0xC3, 0xCA, 0xCD,
-    0x90, 0x97, 0x9E, 0x99, 0x8C, 0x8B, 0x82, 0x85, 0xA8, 0xAF, 0xA6, 0xA1, 0xB4, 0xB3, 0xBA, 0xBD,
-    0xC7, 0xC0, 0xC9, 0xCE, 0xDB, 0xDC, 0xD5, 0xD2, 0xFF, 0xF8, 0xF1, 0xF6, 0xE3, 0xE4, 0xED, 0xEA,
-    0xB7, 0xB0, 0xB9, 0xBE, 0xAB, 0xAC, 0xA5, 0xA2, 0x8F, 0x88, 0x81, 0x86, 0x93, 0x94, 0x9D, 0x9A,
-    0x27, 0x20, 0x29, 0x2E, 0x3B, 0x3C, 0x35, 0x32, 0x1F, 0x18, 0x11, 0x16, 0x03, 0x04, 0x0D, 0x0A,
-    0x57, 0x50, 0x59, 0x5E, 0x4B, 0x4C, 0x45, 0x42, 0x6F, 0x68, 0x61, 0x66, 0x73, 0x74, 0x7D, 0x7A,
-    0x89, 0x8E, 0x87, 0x80, 0x95, 0x92, 0x9B, 0x9C, 0xB1, 0xB6, 0xBF, 0xB8, 0xAD, 0xAA, 0xA3, 0xA4,
-    0xF9, 0xFE, 0xF7, 0xF0, 0xE5, 0xE2, 0xEB, 0xEC, 0xC1, 0xC6, 0xCF, 0xC8, 0xDD, 0xDA, 0xD3, 0xD4,
-    0x69, 0x6E, 0x67, 0x60, 0x75, 0x72, 0x7B, 0x7C, 0x51, 0x56, 0x5F, 0x58, 0x4D, 0x4A, 0x43, 0x44,
-    0x19, 0x1E, 0x17, 0x10, 0x05, 0x02, 0x0B, 0x0C, 0x21, 0x26, 0x2F, 0x28, 0x3D, 0x3A, 0x33, 0x34,
-    0x4E, 0x49, 0x40, 0x47, 0x52, 0x55, 0x5C, 0x5B, 0x76, 0x71, 0x78, 0x7F, 0x6A, 0x6D, 0x64, 0x63,
-    0x3E, 0x39, 0x30, 0x37, 0x22, 0x25, 0x2C, 0x2B, 0x06, 0x01, 0x08, 0x0F, 0x1A, 0x1D, 0x14, 0x13,
-    0xAE, 0xA9, 0xA0, 0xA7, 0xB2, 0xB5, 0xBC, 0xBB, 0x96, 0x91, 0x98, 0x9F, 0x8A, 0x8D, 0x84, 0x83,
-    0xDE, 0xD9, 0xD0, 0xD7, 0xC2, 0xC5, 0xCC, 0xCB, 0xE6, 0xE1, 0xE8, 0xEF, 0xFA, 0xFD, 0xF4, 0xF3
-};
-
-static drflac_uint16 drflac__crc16_table[] = {
-    0x0000, 0x8005, 0x800F, 0x000A, 0x801B, 0x001E, 0x0014, 0x8011,
-    0x8033, 0x0036, 0x003C, 0x8039, 0x0028, 0x802D, 0x8027, 0x0022,
-    0x8063, 0x0066, 0x006C, 0x8069, 0x0078, 0x807D, 0x8077, 0x0072,
-    0x0050, 0x8055, 0x805F, 0x005A, 0x804B, 0x004E, 0x0044, 0x8041,
-    0x80C3, 0x00C6, 0x00CC, 0x80C9, 0x00D8, 0x80DD, 0x80D7, 0x00D2,
-    0x00F0, 0x80F5, 0x80FF, 0x00FA, 0x80EB, 0x00EE, 0x00E4, 0x80E1,
-    0x00A0, 0x80A5, 0x80AF, 0x00AA, 0x80BB, 0x00BE, 0x00B4, 0x80B1,
-    0x8093, 0x0096, 0x009C, 0x8099, 0x0088, 0x808D, 0x8087, 0x0082,
-    0x8183, 0x0186, 0x018C, 0x8189, 0x0198, 0x819D, 0x8197, 0x0192,
-    0x01B0, 0x81B5, 0x81BF, 0x01BA, 0x81AB, 0x01AE, 0x01A4, 0x81A1,
-    0x01E0, 0x81E5, 0x81EF, 0x01EA, 0x81FB, 0x01FE, 0x01F4, 0x81F1,
-    0x81D3, 0x01D6, 0x01DC, 0x81D9, 0x01C8, 0x81CD, 0x81C7, 0x01C2,
-    0x0140, 0x8145, 0x814F, 0x014A, 0x815B, 0x015E, 0x0154, 0x8151,
-    0x8173, 0x0176, 0x017C, 0x8179, 0x0168, 0x816D, 0x8167, 0x0162,
-    0x8123, 0x0126, 0x012C, 0x8129, 0x0138, 0x813D, 0x8137, 0x0132,
-    0x0110, 0x8115, 0x811F, 0x011A, 0x810B, 0x010E, 0x0104, 0x8101,
-    0x8303, 0x0306, 0x030C, 0x8309, 0x0318, 0x831D, 0x8317, 0x0312,
-    0x0330, 0x8335, 0x833F, 0x033A, 0x832B, 0x032E, 0x0324, 0x8321,
-    0x0360, 0x8365, 0x836F, 0x036A, 0x837B, 0x037E, 0x0374, 0x8371,
-    0x8353, 0x0356, 0x035C, 0x8359, 0x0348, 0x834D, 0x8347, 0x0342,
-    0x03C0, 0x83C5, 0x83CF, 0x03CA, 0x83DB, 0x03DE, 0x03D4, 0x83D1,
-    0x83F3, 0x03F6, 0x03FC, 0x83F9, 0x03E8, 0x83ED, 0x83E7, 0x03E2,
-    0x83A3, 0x03A6, 0x03AC, 0x83A9, 0x03B8, 0x83BD, 0x83B7, 0x03B2,
-    0x0390, 0x8395, 0x839F, 0x039A, 0x838B, 0x038E, 0x0384, 0x8381,
-    0x0280, 0x8285, 0x828F, 0x028A, 0x829B, 0x029E, 0x0294, 0x8291,
-    0x82B3, 0x02B6, 0x02BC, 0x82B9, 0x02A8, 0x82AD, 0x82A7, 0x02A2,
-    0x82E3, 0x02E6, 0x02EC, 0x82E9, 0x02F8, 0x82FD, 0x82F7, 0x02F2,
-    0x02D0, 0x82D5, 0x82DF, 0x02DA, 0x82CB, 0x02CE, 0x02C4, 0x82C1,
-    0x8243, 0x0246, 0x024C, 0x8249, 0x0258, 0x825D, 0x8257, 0x0252,
-    0x0270, 0x8275, 0x827F, 0x027A, 0x826B, 0x026E, 0x0264, 0x8261,
-    0x0220, 0x8225, 0x822F, 0x022A, 0x823B, 0x023E, 0x0234, 0x8231,
-    0x8213, 0x0216, 0x021C, 0x8219, 0x0208, 0x820D, 0x8207, 0x0202
-};
-
-static DRFLAC_INLINE drflac_uint8 drflac_crc8_byte(drflac_uint8 crc, drflac_uint8 data)
-{
-    return drflac__crc8_table[crc ^ data];
-}
-
-static DRFLAC_INLINE drflac_uint8 drflac_crc8(drflac_uint8 crc, drflac_uint32 data, drflac_uint32 count)
-{
-#ifdef DR_FLAC_NO_CRC
-    (void)crc;
-    (void)data;
-    (void)count;
-    return 0;
-#else
-#if 0
-    /* REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc8(crc, 0, 8);") */
-    drflac_uint8 p = 0x07;
-    for (int i = count-1; i >= 0; --i) {
-        drflac_uint8 bit = (data & (1 << i)) >> i;
-        if (crc & 0x80) {
-            crc = ((crc << 1) | bit) ^ p;
-        } else {
-            crc = ((crc << 1) | bit);
-        }
-    }
-    return crc;
-#else
-    drflac_uint32 wholeBytes;
-    drflac_uint32 leftoverBits;
-    drflac_uint64 leftoverDataMask;
-
-    static drflac_uint64 leftoverDataMaskTable[8] = {
-        0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
-    };
-
-    DRFLAC_ASSERT(count <= 32);
-
-    wholeBytes = count >> 3;
-    leftoverBits = count - (wholeBytes*8);
-    leftoverDataMask = leftoverDataMaskTable[leftoverBits];
-
-    switch (wholeBytes) {
-        case 4: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0xFF000000UL << leftoverBits)) >> (24 + leftoverBits)));
-        case 3: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x00FF0000UL << leftoverBits)) >> (16 + leftoverBits)));
-        case 2: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x0000FF00UL << leftoverBits)) >> ( 8 + leftoverBits)));
-        case 1: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x000000FFUL << leftoverBits)) >> ( 0 + leftoverBits)));
-        case 0: if (leftoverBits > 0) crc = (drflac_uint8)((crc << leftoverBits) ^ drflac__crc8_table[(crc >> (8 - leftoverBits)) ^ (data & leftoverDataMask)]);
-    }
-    return crc;
-#endif
-#endif
-}
-
-static DRFLAC_INLINE drflac_uint16 drflac_crc16_byte(drflac_uint16 crc, drflac_uint8 data)
-{
-    return (crc << 8) ^ drflac__crc16_table[(drflac_uint8)(crc >> 8) ^ data];
-}
-
-static DRFLAC_INLINE drflac_uint16 drflac_crc16_cache(drflac_uint16 crc, drflac_cache_t data)
-{
-#ifdef DRFLAC_64BIT
-    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 56) & 0xFF));
-    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 48) & 0xFF));
-    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 40) & 0xFF));
-    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 32) & 0xFF));
-#endif
-    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 24) & 0xFF));
-    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 16) & 0xFF));
-    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  8) & 0xFF));
-    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  0) & 0xFF));
-
-    return crc;
-}
-
-static DRFLAC_INLINE drflac_uint16 drflac_crc16_bytes(drflac_uint16 crc, drflac_cache_t data, drflac_uint32 byteCount)
-{
-    switch (byteCount)
-    {
-#ifdef DRFLAC_64BIT
-    case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 56) & 0xFF));
-    case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 48) & 0xFF));
-    case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 40) & 0xFF));
-    case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 32) & 0xFF));
-#endif
-    case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 24) & 0xFF));
-    case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 16) & 0xFF));
-    case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  8) & 0xFF));
-    case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  0) & 0xFF));
-    }
-
-    return crc;
-}
-
-#if 0
-static DRFLAC_INLINE drflac_uint16 drflac_crc16__32bit(drflac_uint16 crc, drflac_uint32 data, drflac_uint32 count)
-{
-#ifdef DR_FLAC_NO_CRC
-    (void)crc;
-    (void)data;
-    (void)count;
-    return 0;
-#else
-#if 0
-    /* REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc16(crc, 0, 16);") */
-    drflac_uint16 p = 0x8005;
-    for (int i = count-1; i >= 0; --i) {
-        drflac_uint16 bit = (data & (1ULL << i)) >> i;
-        if (r & 0x8000) {
-            r = ((r << 1) | bit) ^ p;
-        } else {
-            r = ((r << 1) | bit);
-        }
-    }
-
-    return crc;
-#else
-    drflac_uint32 wholeBytes;
-    drflac_uint32 leftoverBits;
-    drflac_uint64 leftoverDataMask;
-
-    static drflac_uint64 leftoverDataMaskTable[8] = {
-        0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
-    };
-
-    DRFLAC_ASSERT(count <= 64);
-
-    wholeBytes = count >> 3;
-    leftoverBits = count & 7;
-    leftoverDataMask = leftoverDataMaskTable[leftoverBits];
-
-    switch (wholeBytes) {
-        default:
-        case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0xFF000000UL << leftoverBits)) >> (24 + leftoverBits)));
-        case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x00FF0000UL << leftoverBits)) >> (16 + leftoverBits)));
-        case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x0000FF00UL << leftoverBits)) >> ( 8 + leftoverBits)));
-        case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x000000FFUL << leftoverBits)) >> ( 0 + leftoverBits)));
-        case 0: if (leftoverBits > 0) crc = (crc << leftoverBits) ^ drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)];
-    }
-    return crc;
-#endif
-#endif
-}
-
-static DRFLAC_INLINE drflac_uint16 drflac_crc16__64bit(drflac_uint16 crc, drflac_uint64 data, drflac_uint32 count)
-{
-#ifdef DR_FLAC_NO_CRC
-    (void)crc;
-    (void)data;
-    (void)count;
-    return 0;
-#else
-    drflac_uint32 wholeBytes;
-    drflac_uint32 leftoverBits;
-    drflac_uint64 leftoverDataMask;
-
-    static drflac_uint64 leftoverDataMaskTable[8] = {
-        0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
-    };
-
-    DRFLAC_ASSERT(count <= 64);
-
-    wholeBytes = count >> 3;
-    leftoverBits = count & 7;
-    leftoverDataMask = leftoverDataMaskTable[leftoverBits];
-
-    switch (wholeBytes) {
-        default:
-        case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0xFF000000 << 32) << leftoverBits)) >> (56 + leftoverBits)));    /* Weird "<< 32" bitshift is required for C89 because it doesn't support 64-bit constants. Should be optimized out by a good compiler. */
-        case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x00FF0000 << 32) << leftoverBits)) >> (48 + leftoverBits)));
-        case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x0000FF00 << 32) << leftoverBits)) >> (40 + leftoverBits)));
-        case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x000000FF << 32) << leftoverBits)) >> (32 + leftoverBits)));
-        case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0xFF000000      ) << leftoverBits)) >> (24 + leftoverBits)));
-        case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x00FF0000      ) << leftoverBits)) >> (16 + leftoverBits)));
-        case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x0000FF00      ) << leftoverBits)) >> ( 8 + leftoverBits)));
-        case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x000000FF      ) << leftoverBits)) >> ( 0 + leftoverBits)));
-        case 0: if (leftoverBits > 0) crc = (crc << leftoverBits) ^ drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)];
-    }
-    return crc;
-#endif
-}
-
-
-static DRFLAC_INLINE drflac_uint16 drflac_crc16(drflac_uint16 crc, drflac_cache_t data, drflac_uint32 count)
-{
-#ifdef DRFLAC_64BIT
-    return drflac_crc16__64bit(crc, data, count);
-#else
-    return drflac_crc16__32bit(crc, data, count);
-#endif
-}
-#endif
-
-
-#ifdef DRFLAC_64BIT
-#define drflac__be2host__cache_line drflac__be2host_64
-#else
-#define drflac__be2host__cache_line drflac__be2host_32
-#endif
-
-/*
-BIT READING ATTEMPT #2
-
-This uses a 32- or 64-bit bit-shifted cache - as bits are read, the cache is shifted such that the first valid bit is sitting
-on the most significant bit. It uses the notion of an L1 and L2 cache (borrowed from CPU architecture), where the L1 cache
-is a 32- or 64-bit unsigned integer (depending on whether or not a 32- or 64-bit build is being compiled) and the L2 is an
-array of "cache lines", with each cache line being the same size as the L1. The L2 is a buffer of about 4KB and is where data
-from onRead() is read into.
-*/
-#define DRFLAC_CACHE_L1_SIZE_BYTES(bs)                      (sizeof((bs)->cache))
-#define DRFLAC_CACHE_L1_SIZE_BITS(bs)                       (sizeof((bs)->cache)*8)
-#define DRFLAC_CACHE_L1_BITS_REMAINING(bs)                  (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (bs)->consumedBits)
-#define DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount)           (~((~(drflac_cache_t)0) >> (_bitCount)))
-#define DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, _bitCount)      (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (_bitCount))
-#define DRFLAC_CACHE_L1_SELECT(bs, _bitCount)               (((bs)->cache) & DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount))
-#define DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, _bitCount)     (DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >>  DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount)))
-#define DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, _bitCount)(DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >> (DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount)) & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1)))
-#define DRFLAC_CACHE_L2_SIZE_BYTES(bs)                      (sizeof((bs)->cacheL2))
-#define DRFLAC_CACHE_L2_LINE_COUNT(bs)                      (DRFLAC_CACHE_L2_SIZE_BYTES(bs) / sizeof((bs)->cacheL2[0]))
-#define DRFLAC_CACHE_L2_LINES_REMAINING(bs)                 (DRFLAC_CACHE_L2_LINE_COUNT(bs) - (bs)->nextL2Line)
-
-
-#ifndef DR_FLAC_NO_CRC
-static DRFLAC_INLINE void drflac__reset_crc16(drflac_bs* bs)
-{
-    bs->crc16 = 0;
-    bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3;
-}
-
-static DRFLAC_INLINE void drflac__update_crc16(drflac_bs* bs)
-{
-    if (bs->crc16CacheIgnoredBytes == 0) {
-        bs->crc16 = drflac_crc16_cache(bs->crc16, bs->crc16Cache);
-    } else {
-        bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache, DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bs->crc16CacheIgnoredBytes);
-        bs->crc16CacheIgnoredBytes = 0;
-    }
-}
-
-static DRFLAC_INLINE drflac_uint16 drflac__flush_crc16(drflac_bs* bs)
-{
-    /* We should never be flushing in a situation where we are not aligned on a byte boundary. */
-    DRFLAC_ASSERT((DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7) == 0);
-
-    /*
-    The bits that were read from the L1 cache need to be accumulated. The number of bytes needing to be accumulated is determined
-    by the number of bits that have been consumed.
-    */
-    if (DRFLAC_CACHE_L1_BITS_REMAINING(bs) == 0) {
-        drflac__update_crc16(bs);
-    } else {
-        /* We only accumulate the consumed bits. */
-        bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache >> DRFLAC_CACHE_L1_BITS_REMAINING(bs), (bs->consumedBits >> 3) - bs->crc16CacheIgnoredBytes);
-
-        /*
-        The bits that we just accumulated should never be accumulated again. We need to keep track of how many bytes were accumulated
-        so we can handle that later.
-        */
-        bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3;
-    }
-
-    return bs->crc16;
-}
-#endif
-
-static DRFLAC_INLINE drflac_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs)
-{
-    size_t bytesRead;
-    size_t alignedL1LineCount;
-
-    /* Fast path. Try loading straight from L2. */
-    if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
-        bs->cache = bs->cacheL2[bs->nextL2Line++];
-        return DRFLAC_TRUE;
-    }
-
-    /*
-    If we get here it means we've run out of data in the L2 cache. We'll need to fetch more from the client, if there's
-    any left.
-    */
-    if (bs->unalignedByteCount > 0) {
-        return DRFLAC_FALSE;   /* If we have any unaligned bytes it means there's no more aligned bytes left in the client. */
-    }
-
-    bytesRead = bs->onRead(bs->pUserData, bs->cacheL2, DRFLAC_CACHE_L2_SIZE_BYTES(bs));
-
-    bs->nextL2Line = 0;
-    if (bytesRead == DRFLAC_CACHE_L2_SIZE_BYTES(bs)) {
-        bs->cache = bs->cacheL2[bs->nextL2Line++];
-        return DRFLAC_TRUE;
-    }
-
-
-    /*
-    If we get here it means we were unable to retrieve enough data to fill the entire L2 cache. It probably
-    means we've just reached the end of the file. We need to move the valid data down to the end of the buffer
-    and adjust the index of the next line accordingly. Also keep in mind that the L2 cache must be aligned to
-    the size of the L1 so we'll need to seek backwards by any misaligned bytes.
-    */
-    alignedL1LineCount = bytesRead / DRFLAC_CACHE_L1_SIZE_BYTES(bs);
-
-    /* We need to keep track of any unaligned bytes for later use. */
-    bs->unalignedByteCount = bytesRead - (alignedL1LineCount * DRFLAC_CACHE_L1_SIZE_BYTES(bs));
-    if (bs->unalignedByteCount > 0) {
-        bs->unalignedCache = bs->cacheL2[alignedL1LineCount];
-    }
-
-    if (alignedL1LineCount > 0) {
-        size_t offset = DRFLAC_CACHE_L2_LINE_COUNT(bs) - alignedL1LineCount;
-        size_t i;
-        for (i = alignedL1LineCount; i > 0; --i) {
-            bs->cacheL2[i-1 + offset] = bs->cacheL2[i-1];
-        }
-
-        bs->nextL2Line = (drflac_uint32)offset;
-        bs->cache = bs->cacheL2[bs->nextL2Line++];
-        return DRFLAC_TRUE;
-    } else {
-        /* If we get into this branch it means we weren't able to load any L1-aligned data. */
-        bs->nextL2Line = DRFLAC_CACHE_L2_LINE_COUNT(bs);
-        return DRFLAC_FALSE;
-    }
-}
-
-static drflac_bool32 drflac__reload_cache(drflac_bs* bs)
-{
-    size_t bytesRead;
-
-#ifndef DR_FLAC_NO_CRC
-    drflac__update_crc16(bs);
-#endif
-
-    /* Fast path. Try just moving the next value in the L2 cache to the L1 cache. */
-    if (drflac__reload_l1_cache_from_l2(bs)) {
-        bs->cache = drflac__be2host__cache_line(bs->cache);
-        bs->consumedBits = 0;
-#ifndef DR_FLAC_NO_CRC
-        bs->crc16Cache = bs->cache;
-#endif
-        return DRFLAC_TRUE;
-    }
-
-    /* Slow path. */
-
-    /*
-    If we get here it means we have failed to load the L1 cache from the L2. Likely we've just reached the end of the stream and the last
-    few bytes did not meet the alignment requirements for the L2 cache. In this case we need to fall back to a slower path and read the
-    data from the unaligned cache.
-    */
-    bytesRead = bs->unalignedByteCount;
-    if (bytesRead == 0) {
-        bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);   /* <-- The stream has been exhausted, so marked the bits as consumed. */
-        return DRFLAC_FALSE;
-    }
-
-    DRFLAC_ASSERT(bytesRead < DRFLAC_CACHE_L1_SIZE_BYTES(bs));
-    bs->consumedBits = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bytesRead) * 8;
-
-    bs->cache = drflac__be2host__cache_line(bs->unalignedCache);
-    bs->cache &= DRFLAC_CACHE_L1_SELECTION_MASK(DRFLAC_CACHE_L1_BITS_REMAINING(bs));    /* <-- Make sure the consumed bits are always set to zero. Other parts of the library depend on this property. */
-    bs->unalignedByteCount = 0;     /* <-- At this point the unaligned bytes have been moved into the cache and we thus have no more unaligned bytes. */
-
-#ifndef DR_FLAC_NO_CRC
-    bs->crc16Cache = bs->cache >> bs->consumedBits;
-    bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3;
-#endif
-    return DRFLAC_TRUE;
-}
-
-static void drflac__reset_cache(drflac_bs* bs)
-{
-    bs->nextL2Line   = DRFLAC_CACHE_L2_LINE_COUNT(bs);  /* <-- This clears the L2 cache. */
-    bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);   /* <-- This clears the L1 cache. */
-    bs->cache = 0;
-    bs->unalignedByteCount = 0;                         /* <-- This clears the trailing unaligned bytes. */
-    bs->unalignedCache = 0;
-
-#ifndef DR_FLAC_NO_CRC
-    bs->crc16Cache = 0;
-    bs->crc16CacheIgnoredBytes = 0;
-#endif
-}
-
-
-static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned int bitCount, drflac_uint32* pResultOut)
-{
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(pResultOut != NULL);
-    DRFLAC_ASSERT(bitCount > 0);
-    DRFLAC_ASSERT(bitCount <= 32);
-
-    if (bs->consumedBits == DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
-        if (!drflac__reload_cache(bs)) {
-            return DRFLAC_FALSE;
-        }
-    }
-
-    if (bitCount <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
-        /*
-        If we want to load all 32-bits from a 32-bit cache we need to do it slightly differently because we can't do
-        a 32-bit shift on a 32-bit integer. This will never be the case on 64-bit caches, so we can have a slightly
-        more optimal solution for this.
-        */
-#ifdef DRFLAC_64BIT
-        *pResultOut = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount);
-        bs->consumedBits += bitCount;
-        bs->cache <<= bitCount;
-#else
-        if (bitCount < DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
-            *pResultOut = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount);
-            bs->consumedBits += bitCount;
-            bs->cache <<= bitCount;
-        } else {
-            /* Cannot shift by 32-bits, so need to do it differently. */
-            *pResultOut = (drflac_uint32)bs->cache;
-            bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);
-            bs->cache = 0;
-        }
-#endif
-
-        return DRFLAC_TRUE;
-    } else {
-        /* It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. */
-        drflac_uint32 bitCountHi = DRFLAC_CACHE_L1_BITS_REMAINING(bs);
-        drflac_uint32 bitCountLo = bitCount - bitCountHi;
-        drflac_uint32 resultHi;
-
-        DRFLAC_ASSERT(bitCountHi > 0);
-        DRFLAC_ASSERT(bitCountHi < 32);
-        resultHi = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountHi);
-
-        if (!drflac__reload_cache(bs)) {
-            return DRFLAC_FALSE;
-        }
-
-        *pResultOut = (resultHi << bitCountLo) | (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountLo);
-        bs->consumedBits += bitCountLo;
-        bs->cache <<= bitCountLo;
-        return DRFLAC_TRUE;
-    }
-}
-
-static drflac_bool32 drflac__read_int32(drflac_bs* bs, unsigned int bitCount, drflac_int32* pResult)
-{
-    drflac_uint32 result;
-    drflac_uint32 signbit;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(pResult != NULL);
-    DRFLAC_ASSERT(bitCount > 0);
-    DRFLAC_ASSERT(bitCount <= 32);
-
-    if (!drflac__read_uint32(bs, bitCount, &result)) {
-        return DRFLAC_FALSE;
-    }
-
-    signbit = ((result >> (bitCount-1)) & 0x01);
-    result |= (~signbit + 1) << bitCount;
-
-    *pResult = (drflac_int32)result;
-    return DRFLAC_TRUE;
-}
-
-#ifdef DRFLAC_64BIT
-static drflac_bool32 drflac__read_uint64(drflac_bs* bs, unsigned int bitCount, drflac_uint64* pResultOut)
-{
-    drflac_uint32 resultHi;
-    drflac_uint32 resultLo;
-
-    DRFLAC_ASSERT(bitCount <= 64);
-    DRFLAC_ASSERT(bitCount >  32);
-
-    if (!drflac__read_uint32(bs, bitCount - 32, &resultHi)) {
-        return DRFLAC_FALSE;
-    }
-
-    if (!drflac__read_uint32(bs, 32, &resultLo)) {
-        return DRFLAC_FALSE;
-    }
-
-    *pResultOut = (((drflac_uint64)resultHi) << 32) | ((drflac_uint64)resultLo);
-    return DRFLAC_TRUE;
-}
-#endif
-
-/* Function below is unused, but leaving it here in case I need to quickly add it again. */
-#if 0
-static drflac_bool32 drflac__read_int64(drflac_bs* bs, unsigned int bitCount, drflac_int64* pResultOut)
-{
-    drflac_uint64 result;
-    drflac_uint64 signbit;
-
-    DRFLAC_ASSERT(bitCount <= 64);
-
-    if (!drflac__read_uint64(bs, bitCount, &result)) {
-        return DRFLAC_FALSE;
-    }
-
-    signbit = ((result >> (bitCount-1)) & 0x01);
-    result |= (~signbit + 1) << bitCount;
-
-    *pResultOut = (drflac_int64)result;
-    return DRFLAC_TRUE;
-}
-#endif
-
-static drflac_bool32 drflac__read_uint16(drflac_bs* bs, unsigned int bitCount, drflac_uint16* pResult)
-{
-    drflac_uint32 result;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(pResult != NULL);
-    DRFLAC_ASSERT(bitCount > 0);
-    DRFLAC_ASSERT(bitCount <= 16);
-
-    if (!drflac__read_uint32(bs, bitCount, &result)) {
-        return DRFLAC_FALSE;
-    }
-
-    *pResult = (drflac_uint16)result;
-    return DRFLAC_TRUE;
-}
-
-#if 0
-static drflac_bool32 drflac__read_int16(drflac_bs* bs, unsigned int bitCount, drflac_int16* pResult)
-{
-    drflac_int32 result;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(pResult != NULL);
-    DRFLAC_ASSERT(bitCount > 0);
-    DRFLAC_ASSERT(bitCount <= 16);
-
-    if (!drflac__read_int32(bs, bitCount, &result)) {
-        return DRFLAC_FALSE;
-    }
-
-    *pResult = (drflac_int16)result;
-    return DRFLAC_TRUE;
-}
-#endif
-
-static drflac_bool32 drflac__read_uint8(drflac_bs* bs, unsigned int bitCount, drflac_uint8* pResult)
-{
-    drflac_uint32 result;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(pResult != NULL);
-    DRFLAC_ASSERT(bitCount > 0);
-    DRFLAC_ASSERT(bitCount <= 8);
-
-    if (!drflac__read_uint32(bs, bitCount, &result)) {
-        return DRFLAC_FALSE;
-    }
-
-    *pResult = (drflac_uint8)result;
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__read_int8(drflac_bs* bs, unsigned int bitCount, drflac_int8* pResult)
-{
-    drflac_int32 result;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(pResult != NULL);
-    DRFLAC_ASSERT(bitCount > 0);
-    DRFLAC_ASSERT(bitCount <= 8);
-
-    if (!drflac__read_int32(bs, bitCount, &result)) {
-        return DRFLAC_FALSE;
-    }
-
-    *pResult = (drflac_int8)result;
-    return DRFLAC_TRUE;
-}
-
-
-static drflac_bool32 drflac__seek_bits(drflac_bs* bs, size_t bitsToSeek)
-{
-    if (bitsToSeek <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
-        bs->consumedBits += (drflac_uint32)bitsToSeek;
-        bs->cache <<= bitsToSeek;
-        return DRFLAC_TRUE;
-    } else {
-        /* It straddles the cached data. This function isn't called too frequently so I'm favouring simplicity here. */
-        bitsToSeek       -= DRFLAC_CACHE_L1_BITS_REMAINING(bs);
-        bs->consumedBits += DRFLAC_CACHE_L1_BITS_REMAINING(bs);
-        bs->cache         = 0;
-
-        /* Simple case. Seek in groups of the same number as bits that fit within a cache line. */
-#ifdef DRFLAC_64BIT
-        while (bitsToSeek >= DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
-            drflac_uint64 bin;
-            if (!drflac__read_uint64(bs, DRFLAC_CACHE_L1_SIZE_BITS(bs), &bin)) {
-                return DRFLAC_FALSE;
-            }
-            bitsToSeek -= DRFLAC_CACHE_L1_SIZE_BITS(bs);
-        }
-#else
-        while (bitsToSeek >= DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
-            drflac_uint32 bin;
-            if (!drflac__read_uint32(bs, DRFLAC_CACHE_L1_SIZE_BITS(bs), &bin)) {
-                return DRFLAC_FALSE;
-            }
-            bitsToSeek -= DRFLAC_CACHE_L1_SIZE_BITS(bs);
-        }
-#endif
-
-        /* Whole leftover bytes. */
-        while (bitsToSeek >= 8) {
-            drflac_uint8 bin;
-            if (!drflac__read_uint8(bs, 8, &bin)) {
-                return DRFLAC_FALSE;
-            }
-            bitsToSeek -= 8;
-        }
-
-        /* Leftover bits. */
-        if (bitsToSeek > 0) {
-            drflac_uint8 bin;
-            if (!drflac__read_uint8(bs, (drflac_uint32)bitsToSeek, &bin)) {
-                return DRFLAC_FALSE;
-            }
-            bitsToSeek = 0; /* <-- Necessary for the assert below. */
-        }
-
-        DRFLAC_ASSERT(bitsToSeek == 0);
-        return DRFLAC_TRUE;
-    }
-}
-
-
-/* This function moves the bit streamer to the first bit after the sync code (bit 15 of the of the frame header). It will also update the CRC-16. */
-static drflac_bool32 drflac__find_and_seek_to_next_sync_code(drflac_bs* bs)
-{
-    DRFLAC_ASSERT(bs != NULL);
-
-    /*
-    The sync code is always aligned to 8 bits. This is convenient for us because it means we can do byte-aligned movements. The first
-    thing to do is align to the next byte.
-    */
-    if (!drflac__seek_bits(bs, DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7)) {
-        return DRFLAC_FALSE;
-    }
-
-    for (;;) {
-        drflac_uint8 hi;
-
-#ifndef DR_FLAC_NO_CRC
-        drflac__reset_crc16(bs);
-#endif
-
-        if (!drflac__read_uint8(bs, 8, &hi)) {
-            return DRFLAC_FALSE;
-        }
-
-        if (hi == 0xFF) {
-            drflac_uint8 lo;
-            if (!drflac__read_uint8(bs, 6, &lo)) {
-                return DRFLAC_FALSE;
-            }
-
-            if (lo == 0x3E) {
-                return DRFLAC_TRUE;
-            } else {
-                if (!drflac__seek_bits(bs, DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7)) {
-                    return DRFLAC_FALSE;
-                }
-            }
-        }
-    }
-
-    /* Should never get here. */
-    /*return DRFLAC_FALSE;*/
-}
-
-
-#if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
-#define DRFLAC_IMPLEMENT_CLZ_LZCNT
-#endif
-#if  defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(DRFLAC_X64) || defined(DRFLAC_X86))
-#define DRFLAC_IMPLEMENT_CLZ_MSVC
-#endif
-
-static DRFLAC_INLINE drflac_uint32 drflac__clz_software(drflac_cache_t x)
-{
-    drflac_uint32 n;
-    static drflac_uint32 clz_table_4[] = {
-        0,
-        4,
-        3, 3,
-        2, 2, 2, 2,
-        1, 1, 1, 1, 1, 1, 1, 1
-    };
-
-    if (x == 0) {
-        return sizeof(x)*8;
-    }
-
-    n = clz_table_4[x >> (sizeof(x)*8 - 4)];
-    if (n == 0) {
-#ifdef DRFLAC_64BIT
-        if ((x & ((drflac_uint64)0xFFFFFFFF << 32)) == 0) { n  = 32; x <<= 32; }
-        if ((x & ((drflac_uint64)0xFFFF0000 << 32)) == 0) { n += 16; x <<= 16; }
-        if ((x & ((drflac_uint64)0xFF000000 << 32)) == 0) { n += 8;  x <<= 8;  }
-        if ((x & ((drflac_uint64)0xF0000000 << 32)) == 0) { n += 4;  x <<= 4;  }
-#else
-        if ((x & 0xFFFF0000) == 0) { n  = 16; x <<= 16; }
-        if ((x & 0xFF000000) == 0) { n += 8;  x <<= 8;  }
-        if ((x & 0xF0000000) == 0) { n += 4;  x <<= 4;  }
-#endif
-        n += clz_table_4[x >> (sizeof(x)*8 - 4)];
-    }
-
-    return n - 1;
-}
-
-#ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT
-static DRFLAC_INLINE drflac_bool32 drflac__is_lzcnt_supported(void)
-{
-    /* Fast compile time check for ARM. */
-#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5)
-    return DRFLAC_TRUE;
-#else
-    /* If the compiler itself does not support the intrinsic then we'll need to return false. */
-    #ifdef DRFLAC_HAS_LZCNT_INTRINSIC
-        return drflac__gIsLZCNTSupported;
-    #else
-        return DRFLAC_FALSE;
-    #endif
-#endif
-}
-
-static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x)
-{
-#if defined(_MSC_VER) && !defined(__clang__)
-    #ifdef DRFLAC_64BIT
-        return (drflac_uint32)__lzcnt64(x);
-    #else
-        return (drflac_uint32)__lzcnt(x);
-    #endif
-#else
-    #if defined(__GNUC__) || defined(__clang__)
-        #if defined(DRFLAC_X64)
-            {
-                drflac_uint64 r;
-                __asm__ __volatile__ (
-                    "lzcnt{ %1, %0| %0, %1}" : "=r"(r) : "r"(x)
-                );
-
-                return (drflac_uint32)r;
-            }
-        #elif defined(DRFLAC_X86)
-            {
-                drflac_uint32 r;
-                __asm__ __volatile__ (
-                    "lzcnt{l %1, %0| %0, %1}" : "=r"(r) : "r"(x)
-                );
-
-                return r;
-            }
-        #elif defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) && !defined(DRFLAC_64BIT)   /* <-- I haven't tested 64-bit inline assembly, so only enabling this for the 32-bit build for now. */
-            {
-                unsigned int r;
-                __asm__ __volatile__ (
-                #if defined(DRFLAC_64BIT)
-                    "clz %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(x)   /* <-- This is untested. If someone in the community could test this, that would be appreciated! */
-                #else
-                    "clz %[out], %[in]" : [out]"=r"(r) : [in]"r"(x)
-                #endif
-                );
-
-                return r;
-            }
-        #else
-            if (x == 0) {
-                return sizeof(x)*8;
-            }
-            #ifdef DRFLAC_64BIT
-                return (drflac_uint32)__builtin_clzll((drflac_uint64)x);
-            #else
-                return (drflac_uint32)__builtin_clzl((drflac_uint32)x);
-            #endif
-        #endif
-    #else
-        /* Unsupported compiler. */
-        #error "This compiler does not support the lzcnt intrinsic."
-    #endif
-#endif
-}
-#endif
-
-#ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
-#include <intrin.h> /* For BitScanReverse(). */
-
-static DRFLAC_INLINE drflac_uint32 drflac__clz_msvc(drflac_cache_t x)
-{
-    drflac_uint32 n;
-
-    if (x == 0) {
-        return sizeof(x)*8;
-    }
-
-#ifdef DRFLAC_64BIT
-    _BitScanReverse64((unsigned long*)&n, x);
-#else
-    _BitScanReverse((unsigned long*)&n, x);
-#endif
-    return sizeof(x)*8 - n - 1;
-}
-#endif
-
-static DRFLAC_INLINE drflac_uint32 drflac__clz(drflac_cache_t x)
-{
-#ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT
-    if (drflac__is_lzcnt_supported()) {
-        return drflac__clz_lzcnt(x);
-    } else
-#endif
-    {
-#ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
-        return drflac__clz_msvc(x);
-#else
-        return drflac__clz_software(x);
-#endif
-    }
-}
-
-
-static DRFLAC_INLINE drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs, unsigned int* pOffsetOut)
-{
-    drflac_uint32 zeroCounter = 0;
-    drflac_uint32 setBitOffsetPlus1;
-
-    while (bs->cache == 0) {
-        zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs);
-        if (!drflac__reload_cache(bs)) {
-            return DRFLAC_FALSE;
-        }
-    }
-
-    setBitOffsetPlus1 = drflac__clz(bs->cache);
-    setBitOffsetPlus1 += 1;
-
-    bs->consumedBits += setBitOffsetPlus1;
-    bs->cache <<= setBitOffsetPlus1;
-
-    *pOffsetOut = zeroCounter + setBitOffsetPlus1 - 1;
-    return DRFLAC_TRUE;
-}
-
-
-
-static drflac_bool32 drflac__seek_to_byte(drflac_bs* bs, drflac_uint64 offsetFromStart)
-{
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(offsetFromStart > 0);
-
-    /*
-    Seeking from the start is not quite as trivial as it sounds because the onSeek callback takes a signed 32-bit integer (which
-    is intentional because it simplifies the implementation of the onSeek callbacks), however offsetFromStart is unsigned 64-bit.
-    To resolve we just need to do an initial seek from the start, and then a series of offset seeks to make up the remainder.
-    */
-    if (offsetFromStart > 0x7FFFFFFF) {
-        drflac_uint64 bytesRemaining = offsetFromStart;
-        if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) {
-            return DRFLAC_FALSE;
-        }
-        bytesRemaining -= 0x7FFFFFFF;
-
-        while (bytesRemaining > 0x7FFFFFFF) {
-            if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, drflac_seek_origin_current)) {
-                return DRFLAC_FALSE;
-            }
-            bytesRemaining -= 0x7FFFFFFF;
-        }
-
-        if (bytesRemaining > 0) {
-            if (!bs->onSeek(bs->pUserData, (int)bytesRemaining, drflac_seek_origin_current)) {
-                return DRFLAC_FALSE;
-            }
-        }
-    } else {
-        if (!bs->onSeek(bs->pUserData, (int)offsetFromStart, drflac_seek_origin_start)) {
-            return DRFLAC_FALSE;
-        }
-    }
-
-    /* The cache should be reset to force a reload of fresh data from the client. */
-    drflac__reset_cache(bs);
-    return DRFLAC_TRUE;
-}
-
-
-static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64* pNumberOut, drflac_uint8* pCRCOut)
-{
-    drflac_uint8 crc;
-    drflac_uint64 result;
-    drflac_uint8 utf8[7] = {0};
-    int byteCount;
-    int i;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(pNumberOut != NULL);
-    DRFLAC_ASSERT(pCRCOut != NULL);
-
-    crc = *pCRCOut;
-
-    if (!drflac__read_uint8(bs, 8, utf8)) {
-        *pNumberOut = 0;
-        return DRFLAC_AT_END;
-    }
-    crc = drflac_crc8(crc, utf8[0], 8);
-
-    if ((utf8[0] & 0x80) == 0) {
-        *pNumberOut = utf8[0];
-        *pCRCOut = crc;
-        return DRFLAC_SUCCESS;
-    }
-
-    /*byteCount = 1;*/
-    if ((utf8[0] & 0xE0) == 0xC0) {
-        byteCount = 2;
-    } else if ((utf8[0] & 0xF0) == 0xE0) {
-        byteCount = 3;
-    } else if ((utf8[0] & 0xF8) == 0xF0) {
-        byteCount = 4;
-    } else if ((utf8[0] & 0xFC) == 0xF8) {
-        byteCount = 5;
-    } else if ((utf8[0] & 0xFE) == 0xFC) {
-        byteCount = 6;
-    } else if ((utf8[0] & 0xFF) == 0xFE) {
-        byteCount = 7;
-    } else {
-        *pNumberOut = 0;
-        return DRFLAC_CRC_MISMATCH;     /* Bad UTF-8 encoding. */
-    }
-
-    /* Read extra bytes. */
-    DRFLAC_ASSERT(byteCount > 1);
-
-    result = (drflac_uint64)(utf8[0] & (0xFF >> (byteCount + 1)));
-    for (i = 1; i < byteCount; ++i) {
-        if (!drflac__read_uint8(bs, 8, utf8 + i)) {
-            *pNumberOut = 0;
-            return DRFLAC_AT_END;
-        }
-        crc = drflac_crc8(crc, utf8[i], 8);
-
-        result = (result << 6) | (utf8[i] & 0x3F);
-    }
-
-    *pNumberOut = result;
-    *pCRCOut = crc;
-    return DRFLAC_SUCCESS;
-}
-
-
-
-/*
-The next two functions are responsible for calculating the prediction.
-
-When the bits per sample is >16 we need to use 64-bit integer arithmetic because otherwise we'll run out of precision. It's
-safe to assume this will be slower on 32-bit platforms so we use a more optimal solution when the bits per sample is <=16.
-*/
-static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_32(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
-{
-    drflac_int32 prediction = 0;
-
-    DRFLAC_ASSERT(order <= 32);
-
-    /* 32-bit version. */
-
-    /* VC++ optimizes this to a single jmp. I've not yet verified this for other compilers. */
-    switch (order)
-    {
-    case 32: prediction += coefficients[31] * pDecodedSamples[-32];
-    case 31: prediction += coefficients[30] * pDecodedSamples[-31];
-    case 30: prediction += coefficients[29] * pDecodedSamples[-30];
-    case 29: prediction += coefficients[28] * pDecodedSamples[-29];
-    case 28: prediction += coefficients[27] * pDecodedSamples[-28];
-    case 27: prediction += coefficients[26] * pDecodedSamples[-27];
-    case 26: prediction += coefficients[25] * pDecodedSamples[-26];
-    case 25: prediction += coefficients[24] * pDecodedSamples[-25];
-    case 24: prediction += coefficients[23] * pDecodedSamples[-24];
-    case 23: prediction += coefficients[22] * pDecodedSamples[-23];
-    case 22: prediction += coefficients[21] * pDecodedSamples[-22];
-    case 21: prediction += coefficients[20] * pDecodedSamples[-21];
-    case 20: prediction += coefficients[19] * pDecodedSamples[-20];
-    case 19: prediction += coefficients[18] * pDecodedSamples[-19];
-    case 18: prediction += coefficients[17] * pDecodedSamples[-18];
-    case 17: prediction += coefficients[16] * pDecodedSamples[-17];
-    case 16: prediction += coefficients[15] * pDecodedSamples[-16];
-    case 15: prediction += coefficients[14] * pDecodedSamples[-15];
-    case 14: prediction += coefficients[13] * pDecodedSamples[-14];
-    case 13: prediction += coefficients[12] * pDecodedSamples[-13];
-    case 12: prediction += coefficients[11] * pDecodedSamples[-12];
-    case 11: prediction += coefficients[10] * pDecodedSamples[-11];
-    case 10: prediction += coefficients[ 9] * pDecodedSamples[-10];
-    case  9: prediction += coefficients[ 8] * pDecodedSamples[- 9];
-    case  8: prediction += coefficients[ 7] * pDecodedSamples[- 8];
-    case  7: prediction += coefficients[ 6] * pDecodedSamples[- 7];
-    case  6: prediction += coefficients[ 5] * pDecodedSamples[- 6];
-    case  5: prediction += coefficients[ 4] * pDecodedSamples[- 5];
-    case  4: prediction += coefficients[ 3] * pDecodedSamples[- 4];
-    case  3: prediction += coefficients[ 2] * pDecodedSamples[- 3];
-    case  2: prediction += coefficients[ 1] * pDecodedSamples[- 2];
-    case  1: prediction += coefficients[ 0] * pDecodedSamples[- 1];
-    }
-
-    return (drflac_int32)(prediction >> shift);
-}
-
-static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
-{
-    drflac_int64 prediction;
-
-    DRFLAC_ASSERT(order <= 32);
-
-    /* 64-bit version. */
-
-    /* This method is faster on the 32-bit build when compiling with VC++. See note below. */
-#ifndef DRFLAC_64BIT
-    if (order == 8)
-    {
-        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
-        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
-        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
-        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
-        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
-        prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6];
-        prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7];
-        prediction += coefficients[7] * (drflac_int64)pDecodedSamples[-8];
-    }
-    else if (order == 7)
-    {
-        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
-        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
-        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
-        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
-        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
-        prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6];
-        prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7];
-    }
-    else if (order == 3)
-    {
-        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
-        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
-        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
-    }
-    else if (order == 6)
-    {
-        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
-        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
-        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
-        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
-        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
-        prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6];
-    }
-    else if (order == 5)
-    {
-        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
-        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
-        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
-        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
-        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
-    }
-    else if (order == 4)
-    {
-        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
-        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
-        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
-        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
-    }
-    else if (order == 12)
-    {
-        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
-        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
-        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
-        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
-        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
-        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
-        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
-        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
-        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
-        prediction += coefficients[9]  * (drflac_int64)pDecodedSamples[-10];
-        prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11];
-        prediction += coefficients[11] * (drflac_int64)pDecodedSamples[-12];
-    }
-    else if (order == 2)
-    {
-        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
-        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
-    }
-    else if (order == 1)
-    {
-        prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
-    }
-    else if (order == 10)
-    {
-        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
-        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
-        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
-        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
-        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
-        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
-        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
-        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
-        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
-        prediction += coefficients[9]  * (drflac_int64)pDecodedSamples[-10];
-    }
-    else if (order == 9)
-    {
-        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
-        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
-        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
-        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
-        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
-        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
-        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
-        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
-        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
-    }
-    else if (order == 11)
-    {
-        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
-        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
-        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
-        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
-        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
-        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
-        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
-        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
-        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
-        prediction += coefficients[9]  * (drflac_int64)pDecodedSamples[-10];
-        prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11];
-    }
-    else
-    {
-        int j;
-
-        prediction = 0;
-        for (j = 0; j < (int)order; ++j) {
-            prediction += coefficients[j] * (drflac_int64)pDecodedSamples[-j-1];
-        }
-    }
-#endif
-
-    /*
-    VC++ optimizes this to a single jmp instruction, but only the 64-bit build. The 32-bit build generates less efficient code for some
-    reason. The ugly version above is faster so we'll just switch between the two depending on the target platform.
-    */
-#ifdef DRFLAC_64BIT
-    prediction = 0;
-    switch (order)
-    {
-    case 32: prediction += coefficients[31] * (drflac_int64)pDecodedSamples[-32];
-    case 31: prediction += coefficients[30] * (drflac_int64)pDecodedSamples[-31];
-    case 30: prediction += coefficients[29] * (drflac_int64)pDecodedSamples[-30];
-    case 29: prediction += coefficients[28] * (drflac_int64)pDecodedSamples[-29];
-    case 28: prediction += coefficients[27] * (drflac_int64)pDecodedSamples[-28];
-    case 27: prediction += coefficients[26] * (drflac_int64)pDecodedSamples[-27];
-    case 26: prediction += coefficients[25] * (drflac_int64)pDecodedSamples[-26];
-    case 25: prediction += coefficients[24] * (drflac_int64)pDecodedSamples[-25];
-    case 24: prediction += coefficients[23] * (drflac_int64)pDecodedSamples[-24];
-    case 23: prediction += coefficients[22] * (drflac_int64)pDecodedSamples[-23];
-    case 22: prediction += coefficients[21] * (drflac_int64)pDecodedSamples[-22];
-    case 21: prediction += coefficients[20] * (drflac_int64)pDecodedSamples[-21];
-    case 20: prediction += coefficients[19] * (drflac_int64)pDecodedSamples[-20];
-    case 19: prediction += coefficients[18] * (drflac_int64)pDecodedSamples[-19];
-    case 18: prediction += coefficients[17] * (drflac_int64)pDecodedSamples[-18];
-    case 17: prediction += coefficients[16] * (drflac_int64)pDecodedSamples[-17];
-    case 16: prediction += coefficients[15] * (drflac_int64)pDecodedSamples[-16];
-    case 15: prediction += coefficients[14] * (drflac_int64)pDecodedSamples[-15];
-    case 14: prediction += coefficients[13] * (drflac_int64)pDecodedSamples[-14];
-    case 13: prediction += coefficients[12] * (drflac_int64)pDecodedSamples[-13];
-    case 12: prediction += coefficients[11] * (drflac_int64)pDecodedSamples[-12];
-    case 11: prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11];
-    case 10: prediction += coefficients[ 9] * (drflac_int64)pDecodedSamples[-10];
-    case  9: prediction += coefficients[ 8] * (drflac_int64)pDecodedSamples[- 9];
-    case  8: prediction += coefficients[ 7] * (drflac_int64)pDecodedSamples[- 8];
-    case  7: prediction += coefficients[ 6] * (drflac_int64)pDecodedSamples[- 7];
-    case  6: prediction += coefficients[ 5] * (drflac_int64)pDecodedSamples[- 6];
-    case  5: prediction += coefficients[ 4] * (drflac_int64)pDecodedSamples[- 5];
-    case  4: prediction += coefficients[ 3] * (drflac_int64)pDecodedSamples[- 4];
-    case  3: prediction += coefficients[ 2] * (drflac_int64)pDecodedSamples[- 3];
-    case  2: prediction += coefficients[ 1] * (drflac_int64)pDecodedSamples[- 2];
-    case  1: prediction += coefficients[ 0] * (drflac_int64)pDecodedSamples[- 1];
-    }
-#endif
-
-    return (drflac_int32)(prediction >> shift);
-}
-
-
-#if 0
-/*
-Reference implementation for reading and decoding samples with residual. This is intentionally left unoptimized for the
-sake of readability and should only be used as a reference.
-*/
-static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
-{
-    drflac_uint32 i;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
-    DRFLAC_ASSERT(pSamplesOut != NULL);
-
-    for (i = 0; i < count; ++i) {
-        drflac_uint32 zeroCounter = 0;
-        for (;;) {
-            drflac_uint8 bit;
-            if (!drflac__read_uint8(bs, 1, &bit)) {
-                return DRFLAC_FALSE;
-            }
-
-            if (bit == 0) {
-                zeroCounter += 1;
-            } else {
-                break;
-            }
-        }
-
-        drflac_uint32 decodedRice;
-        if (riceParam > 0) {
-            if (!drflac__read_uint32(bs, riceParam, &decodedRice)) {
-                return DRFLAC_FALSE;
-            }
-        } else {
-            decodedRice = 0;
-        }
-
-        decodedRice |= (zeroCounter << riceParam);
-        if ((decodedRice & 0x01)) {
-            decodedRice = ~(decodedRice >> 1);
-        } else {
-            decodedRice =  (decodedRice >> 1);
-        }
-
-
-        if (bitsPerSample+shift >= 32) {
-            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i);
-        } else {
-            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i);
-        }
-    }
-
-    return DRFLAC_TRUE;
-}
-#endif
-
-#if 0
-static drflac_bool32 drflac__read_rice_parts__reference(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
-{
-    drflac_uint32 zeroCounter = 0;
-    drflac_uint32 decodedRice;
-
-    for (;;) {
-        drflac_uint8 bit;
-        if (!drflac__read_uint8(bs, 1, &bit)) {
-            return DRFLAC_FALSE;
-        }
-
-        if (bit == 0) {
-            zeroCounter += 1;
-        } else {
-            break;
-        }
-    }
-
-    if (riceParam > 0) {
-        if (!drflac__read_uint32(bs, riceParam, &decodedRice)) {
-            return DRFLAC_FALSE;
-        }
-    } else {
-        decodedRice = 0;
-    }
-
-    *pZeroCounterOut = zeroCounter;
-    *pRiceParamPartOut = decodedRice;
-    return DRFLAC_TRUE;
-}
-#endif
-
-#if 0
-static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
-{
-    drflac_cache_t riceParamMask;
-    drflac_uint32 zeroCounter;
-    drflac_uint32 setBitOffsetPlus1;
-    drflac_uint32 riceParamPart;
-    drflac_uint32 riceLength;
-
-    DRFLAC_ASSERT(riceParam > 0);   /* <-- riceParam should never be 0. drflac__read_rice_parts__param_equals_zero() should be used instead for this case. */
-
-    riceParamMask = DRFLAC_CACHE_L1_SELECTION_MASK(riceParam);
-
-    zeroCounter = 0;
-    while (bs->cache == 0) {
-        zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs);
-        if (!drflac__reload_cache(bs)) {
-            return DRFLAC_FALSE;
-        }
-    }
-
-    setBitOffsetPlus1 = drflac__clz(bs->cache);
-    zeroCounter += setBitOffsetPlus1;
-    setBitOffsetPlus1 += 1;
-
-    riceLength = setBitOffsetPlus1 + riceParam;
-    if (riceLength < DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
-        riceParamPart = (drflac_uint32)((bs->cache & (riceParamMask >> setBitOffsetPlus1)) >> DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceLength));
-
-        bs->consumedBits += riceLength;
-        bs->cache <<= riceLength;
-    } else {
-        drflac_uint32 bitCountLo;
-        drflac_cache_t resultHi;
-
-        bs->consumedBits += riceLength;
-        bs->cache <<= setBitOffsetPlus1 & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1);    /* <-- Equivalent to "if (setBitOffsetPlus1 < DRFLAC_CACHE_L1_SIZE_BITS(bs)) { bs->cache <<= setBitOffsetPlus1; }" */
-
-        /* It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. */
-        bitCountLo = bs->consumedBits - DRFLAC_CACHE_L1_SIZE_BITS(bs);
-        resultHi = DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, riceParam);  /* <-- Use DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE() if ever this function allows riceParam=0. */
-
-        if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
-#ifndef DR_FLAC_NO_CRC
-            drflac__update_crc16(bs);
-#endif
-            bs->cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
-            bs->consumedBits = 0;
-#ifndef DR_FLAC_NO_CRC
-            bs->crc16Cache = bs->cache;
-#endif
-        } else {
-            /* Slow path. We need to fetch more data from the client. */
-            if (!drflac__reload_cache(bs)) {
-                return DRFLAC_FALSE;
-            }
-        }
-
-        riceParamPart = (drflac_uint32)(resultHi | DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, bitCountLo));
-
-        bs->consumedBits += bitCountLo;
-        bs->cache <<= bitCountLo;
-    }
-
-    pZeroCounterOut[0] = zeroCounter;
-    pRiceParamPartOut[0] = riceParamPart;
-
-    return DRFLAC_TRUE;
-}
-#endif
-
-static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts_x1(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
-{
-    drflac_uint32  riceParamPlus1 = riceParam + 1;
-    /*drflac_cache_t riceParamPlus1Mask  = DRFLAC_CACHE_L1_SELECTION_MASK(riceParamPlus1);*/
-    drflac_uint32  riceParamPlus1Shift = DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPlus1);
-    drflac_uint32  riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1;
-
-    /*
-    The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have
-    no idea how this will work in practice...
-    */
-    drflac_cache_t bs_cache = bs->cache;
-    drflac_uint32  bs_consumedBits = bs->consumedBits;
-
-    /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */
-    drflac_uint32  lzcount = drflac__clz(bs_cache);
-    if (lzcount < sizeof(bs_cache)*8) {
-        pZeroCounterOut[0] = lzcount;
-
-        /*
-        It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting
-        this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled
-        outside of this function at a higher level.
-        */
-    extract_rice_param_part:
-        bs_cache       <<= lzcount;
-        bs_consumedBits += lzcount;
-
-        if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) {
-            /* Getting here means the rice parameter part is wholly contained within the current cache line. */
-            pRiceParamPartOut[0] = (drflac_uint32)(bs_cache >> riceParamPlus1Shift);
-            bs_cache       <<= riceParamPlus1;
-            bs_consumedBits += riceParamPlus1;
-        } else {
-            drflac_uint32 riceParamPartHi;
-            drflac_uint32 riceParamPartLo;
-            drflac_uint32 riceParamPartLoBitCount;
-
-            /*
-            Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache
-            line, reload the cache, and then combine it with the head of the next cache line.
-            */
-
-            /* Grab the high part of the rice parameter part. */
-            riceParamPartHi = (drflac_uint32)(bs_cache >> riceParamPlus1Shift);
-
-            /* Before reloading the cache we need to grab the size in bits of the low part. */
-            riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits;
-            DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32);
-
-            /* Now reload the cache. */
-            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
-            #ifndef DR_FLAC_NO_CRC
-                drflac__update_crc16(bs);
-            #endif
-                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
-                bs_consumedBits = riceParamPartLoBitCount;
-            #ifndef DR_FLAC_NO_CRC
-                bs->crc16Cache = bs_cache;
-            #endif
-            } else {
-                /* Slow path. We need to fetch more data from the client. */
-                if (!drflac__reload_cache(bs)) {
-                    return DRFLAC_FALSE;
-                }
-
-                bs_cache = bs->cache;
-                bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
-            }
-
-            /* We should now have enough information to construct the rice parameter part. */
-            riceParamPartLo = (drflac_uint32)(bs_cache >> (DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPartLoBitCount)));
-            pRiceParamPartOut[0] = riceParamPartHi | riceParamPartLo;
-
-            bs_cache <<= riceParamPartLoBitCount;
-        }
-    } else {
-        /*
-        Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call
-        to drflac__clz() and we need to reload the cache.
-        */
-        drflac_uint32 zeroCounter = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BITS(bs) - bs_consumedBits);
-        for (;;) {
-            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
-            #ifndef DR_FLAC_NO_CRC
-                drflac__update_crc16(bs);
-            #endif
-                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
-                bs_consumedBits = 0;
-            #ifndef DR_FLAC_NO_CRC
-                bs->crc16Cache = bs_cache;
-            #endif
-            } else {
-                /* Slow path. We need to fetch more data from the client. */
-                if (!drflac__reload_cache(bs)) {
-                    return DRFLAC_FALSE;
-                }
-
-                bs_cache = bs->cache;
-                bs_consumedBits = bs->consumedBits;
-            }
-
-            lzcount = drflac__clz(bs_cache);
-            zeroCounter += lzcount;
-
-            if (lzcount < sizeof(bs_cache)*8) {
-                break;
-            }
-        }
-
-        pZeroCounterOut[0] = zeroCounter;
-        goto extract_rice_param_part;
-    }
-
-    /* Make sure the cache is restored at the end of it all. */
-    bs->cache = bs_cache;
-    bs->consumedBits = bs_consumedBits;
-
-    return DRFLAC_TRUE;
-}
-
-static DRFLAC_INLINE drflac_bool32 drflac__seek_rice_parts(drflac_bs* bs, drflac_uint8 riceParam)
-{
-    drflac_uint32  riceParamPlus1 = riceParam + 1;
-    drflac_uint32  riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1;
-
-    /*
-    The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have
-    no idea how this will work in practice...
-    */
-    drflac_cache_t bs_cache = bs->cache;
-    drflac_uint32  bs_consumedBits = bs->consumedBits;
-
-    /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */
-    drflac_uint32  lzcount = drflac__clz(bs_cache);
-    if (lzcount < sizeof(bs_cache)*8) {
-        /*
-        It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting
-        this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled
-        outside of this function at a higher level.
-        */
-    extract_rice_param_part:
-        bs_cache       <<= lzcount;
-        bs_consumedBits += lzcount;
-
-        if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) {
-            /* Getting here means the rice parameter part is wholly contained within the current cache line. */
-            bs_cache       <<= riceParamPlus1;
-            bs_consumedBits += riceParamPlus1;
-        } else {
-            /*
-            Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache
-            line, reload the cache, and then combine it with the head of the next cache line.
-            */
-
-            /* Before reloading the cache we need to grab the size in bits of the low part. */
-            drflac_uint32 riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits;
-            DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32);
-
-            /* Now reload the cache. */
-            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
-            #ifndef DR_FLAC_NO_CRC
-                drflac__update_crc16(bs);
-            #endif
-                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
-                bs_consumedBits = riceParamPartLoBitCount;
-            #ifndef DR_FLAC_NO_CRC
-                bs->crc16Cache = bs_cache;
-            #endif
-            } else {
-                /* Slow path. We need to fetch more data from the client. */
-                if (!drflac__reload_cache(bs)) {
-                    return DRFLAC_FALSE;
-                }
-
-                bs_cache = bs->cache;
-                bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
-            }
-
-            bs_cache <<= riceParamPartLoBitCount;
-        }
-    } else {
-        /*
-        Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call
-        to drflac__clz() and we need to reload the cache.
-        */
-        for (;;) {
-            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
-            #ifndef DR_FLAC_NO_CRC
-                drflac__update_crc16(bs);
-            #endif
-                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
-                bs_consumedBits = 0;
-            #ifndef DR_FLAC_NO_CRC
-                bs->crc16Cache = bs_cache;
-            #endif
-            } else {
-                /* Slow path. We need to fetch more data from the client. */
-                if (!drflac__reload_cache(bs)) {
-                    return DRFLAC_FALSE;
-                }
-
-                bs_cache = bs->cache;
-                bs_consumedBits = bs->consumedBits;
-            }
-
-            lzcount = drflac__clz(bs_cache);
-            if (lzcount < sizeof(bs_cache)*8) {
-                break;
-            }
-        }
-
-        goto extract_rice_param_part;
-    }
-
-    /* Make sure the cache is restored at the end of it all. */
-    bs->cache = bs_cache;
-    bs->consumedBits = bs_consumedBits;
-
-    return DRFLAC_TRUE;
-}
-
-
-static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar_zeroorder(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
-{
-    drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
-    drflac_uint32 zeroCountPart0;
-    drflac_uint32 riceParamPart0;
-    drflac_uint32 riceParamMask;
-    drflac_uint32 i;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
-    DRFLAC_ASSERT(pSamplesOut != NULL);
-
-    (void)bitsPerSample;
-    (void)order;
-    (void)shift;
-    (void)coefficients;
-
-    riceParamMask  = (drflac_uint32)~((~0UL) << riceParam);
-
-    i = 0;
-    while (i < count) {
-        /* Rice extraction. */
-        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) {
-            return DRFLAC_FALSE;
-        }
-
-        /* Rice reconstruction. */
-        riceParamPart0 &= riceParamMask;
-        riceParamPart0 |= (zeroCountPart0 << riceParam);
-        riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
-
-        pSamplesOut[i] = riceParamPart0;
-
-        i += 1;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
-{
-    drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
-    drflac_uint32 zeroCountPart0 = 0;
-    drflac_uint32 zeroCountPart1 = 0;
-    drflac_uint32 zeroCountPart2 = 0;
-    drflac_uint32 zeroCountPart3 = 0;
-    drflac_uint32 riceParamPart0 = 0;
-    drflac_uint32 riceParamPart1 = 0;
-    drflac_uint32 riceParamPart2 = 0;
-    drflac_uint32 riceParamPart3 = 0;
-    drflac_uint32 riceParamMask;
-    const drflac_int32* pSamplesOutEnd;
-    drflac_uint32 i;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
-    DRFLAC_ASSERT(pSamplesOut != NULL);
-
-    if (order == 0) {
-        return drflac__decode_samples_with_residual__rice__scalar_zeroorder(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
-    }
-
-    riceParamMask  = (drflac_uint32)~((~0UL) << riceParam);
-    pSamplesOutEnd = pSamplesOut + (count & ~3);
-
-    if (bitsPerSample+shift > 32) {
-        while (pSamplesOut < pSamplesOutEnd) {
-            /*
-            Rice extraction. It's faster to do this one at a time against local variables than it is to use the x4 version
-            against an array. Not sure why, but perhaps it's making more efficient use of registers?
-            */
-            if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) ||
-                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) ||
-                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) ||
-                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) {
-                return DRFLAC_FALSE;
-            }
-
-            riceParamPart0 &= riceParamMask;
-            riceParamPart1 &= riceParamMask;
-            riceParamPart2 &= riceParamMask;
-            riceParamPart3 &= riceParamMask;
-
-            riceParamPart0 |= (zeroCountPart0 << riceParam);
-            riceParamPart1 |= (zeroCountPart1 << riceParam);
-            riceParamPart2 |= (zeroCountPart2 << riceParam);
-            riceParamPart3 |= (zeroCountPart3 << riceParam);
-
-            riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
-            riceParamPart1  = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01];
-            riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
-            riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
-
-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
-            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 1);
-            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 2);
-            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 3);
-
-            pSamplesOut += 4;
-        }
-    } else {
-        while (pSamplesOut < pSamplesOutEnd) {
-            if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) ||
-                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) ||
-                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) ||
-                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) {
-                return DRFLAC_FALSE;
-            }
-
-            riceParamPart0 &= riceParamMask;
-            riceParamPart1 &= riceParamMask;
-            riceParamPart2 &= riceParamMask;
-            riceParamPart3 &= riceParamMask;
-
-            riceParamPart0 |= (zeroCountPart0 << riceParam);
-            riceParamPart1 |= (zeroCountPart1 << riceParam);
-            riceParamPart2 |= (zeroCountPart2 << riceParam);
-            riceParamPart3 |= (zeroCountPart3 << riceParam);
-
-            riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
-            riceParamPart1  = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01];
-            riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
-            riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
-
-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
-            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 1);
-            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 2);
-            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 3);
-
-            pSamplesOut += 4;
-        }
-    }
-
-    i = (count & ~3);
-    while (i < count) {
-        /* Rice extraction. */
-        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) {
-            return DRFLAC_FALSE;
-        }
-
-        /* Rice reconstruction. */
-        riceParamPart0 &= riceParamMask;
-        riceParamPart0 |= (zeroCountPart0 << riceParam);
-        riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
-        /*riceParamPart0  = (riceParamPart0 >> 1) ^ (~(riceParamPart0 & 0x01) + 1);*/
-
-        /* Sample reconstruction. */
-        if (bitsPerSample+shift > 32) {
-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
-        } else {
-            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
-        }
-
-        i += 1;
-        pSamplesOut += 1;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE __m128i drflac__mm_packs_interleaved_epi32(__m128i a, __m128i b)
-{
-    __m128i r;
-
-    /* Pack. */
-    r = _mm_packs_epi32(a, b);
-
-    /* a3a2 a1a0 b3b2 b1b0 -> a3a2 b3b2 a1a0 b1b0 */
-    r = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 1, 2, 0));
-
-    /* a3a2 b3b2 a1a0 b1b0 -> a3b3 a2b2 a1b1 a0b0 */
-    r = _mm_shufflehi_epi16(r, _MM_SHUFFLE(3, 1, 2, 0));
-    r = _mm_shufflelo_epi16(r, _MM_SHUFFLE(3, 1, 2, 0));
-
-    return r;
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_SSE41)
-static DRFLAC_INLINE __m128i drflac__mm_not_si128(__m128i a)
-{
-    return _mm_xor_si128(a, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()));
-}
-
-static DRFLAC_INLINE __m128i drflac__mm_hadd_epi32(__m128i x)
-{
-    __m128i x64 = _mm_add_epi32(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2)));
-    __m128i x32 = _mm_shufflelo_epi16(x64, _MM_SHUFFLE(1, 0, 3, 2));
-    return _mm_add_epi32(x64, x32);
-}
-
-static DRFLAC_INLINE __m128i drflac__mm_hadd_epi64(__m128i x)
-{
-    return _mm_add_epi64(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2)));
-}
-
-static DRFLAC_INLINE __m128i drflac__mm_srai_epi64(__m128i x, int count)
-{
-    /*
-    To simplify this we are assuming count < 32. This restriction allows us to work on a low side and a high side. The low side
-    is shifted with zero bits, whereas the right side is shifted with sign bits.
-    */
-    __m128i lo = _mm_srli_epi64(x, count);
-    __m128i hi = _mm_srai_epi32(x, count);
-
-    hi = _mm_and_si128(hi, _mm_set_epi32(0xFFFFFFFF, 0, 0xFFFFFFFF, 0));    /* The high part needs to have the low part cleared. */
-
-    return _mm_or_si128(lo, hi);
-}
-
-static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_32(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
-{
-    int i;
-    drflac_uint32 riceParamMask;
-    drflac_int32* pDecodedSamples    = pSamplesOut;
-    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
-    drflac_uint32 zeroCountParts0 = 0;
-    drflac_uint32 zeroCountParts1 = 0;
-    drflac_uint32 zeroCountParts2 = 0;
-    drflac_uint32 zeroCountParts3 = 0;
-    drflac_uint32 riceParamParts0 = 0;
-    drflac_uint32 riceParamParts1 = 0;
-    drflac_uint32 riceParamParts2 = 0;
-    drflac_uint32 riceParamParts3 = 0;
-    __m128i coefficients128_0;
-    __m128i coefficients128_4;
-    __m128i coefficients128_8;
-    __m128i samples128_0;
-    __m128i samples128_4;
-    __m128i samples128_8;
-    __m128i riceParamMask128;
-
-    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
-
-    riceParamMask    = (drflac_uint32)~((~0UL) << riceParam);
-    riceParamMask128 = _mm_set1_epi32(riceParamMask);
-
-    /* Pre-load. */
-    coefficients128_0 = _mm_setzero_si128();
-    coefficients128_4 = _mm_setzero_si128();
-    coefficients128_8 = _mm_setzero_si128();
-
-    samples128_0 = _mm_setzero_si128();
-    samples128_4 = _mm_setzero_si128();
-    samples128_8 = _mm_setzero_si128();
-
-    /*
-    Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
-    what's available in the input buffers. It would be convenient to use a fall-through switch to do this, but this results
-    in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
-    so I think there's opportunity for this to be simplified.
-    */
-#if 1
-    {
-        int runningOrder = order;
-
-        /* 0 - 3. */
-        if (runningOrder >= 4) {
-            coefficients128_0 = _mm_loadu_si128((const __m128i*)(coefficients + 0));
-            samples128_0      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 4));
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0); break;
-                case 2: coefficients128_0 = _mm_set_epi32(0, 0,               coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0,               0); break;
-                case 1: coefficients128_0 = _mm_set_epi32(0, 0,               0,               coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0,               0,               0); break;
-            }
-            runningOrder = 0;
-        }
-
-        /* 4 - 7 */
-        if (runningOrder >= 4) {
-            coefficients128_4 = _mm_loadu_si128((const __m128i*)(coefficients + 4));
-            samples128_4      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 8));
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0); break;
-                case 2: coefficients128_4 = _mm_set_epi32(0, 0,               coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0,               0); break;
-                case 1: coefficients128_4 = _mm_set_epi32(0, 0,               0,               coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0,               0,               0); break;
-            }
-            runningOrder = 0;
-        }
-
-        /* 8 - 11 */
-        if (runningOrder == 4) {
-            coefficients128_8 = _mm_loadu_si128((const __m128i*)(coefficients + 8));
-            samples128_8      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 12));
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0); break;
-                case 2: coefficients128_8 = _mm_set_epi32(0, 0,                coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0,                0); break;
-                case 1: coefficients128_8 = _mm_set_epi32(0, 0,                0,               coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0,                0,                0); break;
-            }
-            runningOrder = 0;
-        }
-
-        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
-        coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3));
-        coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3));
-        coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3));
-    }
-#else
-    /* This causes strict-aliasing warnings with GCC. */
-    switch (order)
-    {
-    case 12: ((drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12];
-    case 11: ((drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11];
-    case 10: ((drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10];
-    case 9:  ((drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9];
-    case 8:  ((drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8];
-    case 7:  ((drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7];
-    case 6:  ((drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6];
-    case 5:  ((drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5];
-    case 4:  ((drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4];
-    case 3:  ((drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3];
-    case 2:  ((drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2];
-    case 1:  ((drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1];
-    }
-#endif
-
-    /* For this version we are doing one sample at a time. */
-    while (pDecodedSamples < pDecodedSamplesEnd) {
-        __m128i prediction128;
-        __m128i zeroCountPart128;
-        __m128i riceParamPart128;
-
-        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) {
-            return DRFLAC_FALSE;
-        }
-
-        zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0);
-        riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0);
-
-        riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128);
-        riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam));
-        riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01))), _mm_set1_epi32(0x01)));  /* <-- SSE2 compatible */
-        /*riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_mullo_epi32(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01)), _mm_set1_epi32(0xFFFFFFFF)));*/   /* <-- Only supported from SSE4.1 and is slower in my testing... */
-
-        if (order <= 4) {
-            for (i = 0; i < 4; i += 1) {
-                prediction128 = _mm_mullo_epi32(coefficients128_0, samples128_0);
-
-                /* Horizontal add and shift. */
-                prediction128 = drflac__mm_hadd_epi32(prediction128);
-                prediction128 = _mm_srai_epi32(prediction128, shift);
-                prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
-
-                samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
-                riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
-            }
-        } else if (order <= 8) {
-            for (i = 0; i < 4; i += 1) {
-                prediction128 =                              _mm_mullo_epi32(coefficients128_4, samples128_4);
-                prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0));
-
-                /* Horizontal add and shift. */
-                prediction128 = drflac__mm_hadd_epi32(prediction128);
-                prediction128 = _mm_srai_epi32(prediction128, shift);
-                prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
-
-                samples128_4 = _mm_alignr_epi8(samples128_0,  samples128_4, 4);
-                samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
-                riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
-            }
-        } else {
-            for (i = 0; i < 4; i += 1) {
-                prediction128 =                              _mm_mullo_epi32(coefficients128_8, samples128_8);
-                prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_4, samples128_4));
-                prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0));
-
-                /* Horizontal add and shift. */
-                prediction128 = drflac__mm_hadd_epi32(prediction128);
-                prediction128 = _mm_srai_epi32(prediction128, shift);
-                prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
-
-                samples128_8 = _mm_alignr_epi8(samples128_4,  samples128_8, 4);
-                samples128_4 = _mm_alignr_epi8(samples128_0,  samples128_4, 4);
-                samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
-                riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
-            }
-        }
-
-        /* We store samples in groups of 4. */
-        _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0);
-        pDecodedSamples += 4;
-    }
-
-    /* Make sure we process the last few samples. */
-    i = (count & ~3);
-    while (i < (int)count) {
-        /* Rice extraction. */
-        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) {
-            return DRFLAC_FALSE;
-        }
-
-        /* Rice reconstruction. */
-        riceParamParts0 &= riceParamMask;
-        riceParamParts0 |= (zeroCountParts0 << riceParam);
-        riceParamParts0  = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01];
-
-        /* Sample reconstruction. */
-        pDecodedSamples[0] = riceParamParts0 + drflac__calculate_prediction_32(order, shift, coefficients, pDecodedSamples);
-
-        i += 1;
-        pDecodedSamples += 1;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_64(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
-{
-    int i;
-    drflac_uint32 riceParamMask;
-    drflac_int32* pDecodedSamples    = pSamplesOut;
-    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
-    drflac_uint32 zeroCountParts0 = 0;
-    drflac_uint32 zeroCountParts1 = 0;
-    drflac_uint32 zeroCountParts2 = 0;
-    drflac_uint32 zeroCountParts3 = 0;
-    drflac_uint32 riceParamParts0 = 0;
-    drflac_uint32 riceParamParts1 = 0;
-    drflac_uint32 riceParamParts2 = 0;
-    drflac_uint32 riceParamParts3 = 0;
-    __m128i coefficients128_0;
-    __m128i coefficients128_4;
-    __m128i coefficients128_8;
-    __m128i samples128_0;
-    __m128i samples128_4;
-    __m128i samples128_8;
-    __m128i prediction128;
-    __m128i riceParamMask128;
-
-    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
-
-    DRFLAC_ASSERT(order <= 12);
-
-    riceParamMask    = (drflac_uint32)~((~0UL) << riceParam);
-    riceParamMask128 = _mm_set1_epi32(riceParamMask);
-
-    prediction128 = _mm_setzero_si128();
-
-    /* Pre-load. */
-    coefficients128_0  = _mm_setzero_si128();
-    coefficients128_4  = _mm_setzero_si128();
-    coefficients128_8  = _mm_setzero_si128();
-
-    samples128_0  = _mm_setzero_si128();
-    samples128_4  = _mm_setzero_si128();
-    samples128_8  = _mm_setzero_si128();
-
-#if 1
-    {
-        int runningOrder = order;
-
-        /* 0 - 3. */
-        if (runningOrder >= 4) {
-            coefficients128_0 = _mm_loadu_si128((const __m128i*)(coefficients + 0));
-            samples128_0      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 4));
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0); break;
-                case 2: coefficients128_0 = _mm_set_epi32(0, 0,               coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0,               0); break;
-                case 1: coefficients128_0 = _mm_set_epi32(0, 0,               0,               coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0,               0,               0); break;
-            }
-            runningOrder = 0;
-        }
-
-        /* 4 - 7 */
-        if (runningOrder >= 4) {
-            coefficients128_4 = _mm_loadu_si128((const __m128i*)(coefficients + 4));
-            samples128_4      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 8));
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0); break;
-                case 2: coefficients128_4 = _mm_set_epi32(0, 0,               coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0,               0); break;
-                case 1: coefficients128_4 = _mm_set_epi32(0, 0,               0,               coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0,               0,               0); break;
-            }
-            runningOrder = 0;
-        }
-
-        /* 8 - 11 */
-        if (runningOrder == 4) {
-            coefficients128_8 = _mm_loadu_si128((const __m128i*)(coefficients + 8));
-            samples128_8      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 12));
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0); break;
-                case 2: coefficients128_8 = _mm_set_epi32(0, 0,                coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0,                0); break;
-                case 1: coefficients128_8 = _mm_set_epi32(0, 0,                0,               coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0,                0,                0); break;
-            }
-            runningOrder = 0;
-        }
-
-        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
-        coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3));
-        coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3));
-        coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3));
-    }
-#else
-    switch (order)
-    {
-    case 12: ((drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12];
-    case 11: ((drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11];
-    case 10: ((drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10];
-    case 9:  ((drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9];
-    case 8:  ((drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8];
-    case 7:  ((drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7];
-    case 6:  ((drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6];
-    case 5:  ((drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5];
-    case 4:  ((drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4];
-    case 3:  ((drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3];
-    case 2:  ((drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2];
-    case 1:  ((drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1];
-    }
-#endif
-
-    /* For this version we are doing one sample at a time. */
-    while (pDecodedSamples < pDecodedSamplesEnd) {
-        __m128i zeroCountPart128;
-        __m128i riceParamPart128;
-
-        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) {
-            return DRFLAC_FALSE;
-        }
-
-        zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0);
-        riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0);
-
-        riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128);
-        riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam));
-        riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(1))), _mm_set1_epi32(1)));
-
-        for (i = 0; i < 4; i += 1) {
-            prediction128 = _mm_xor_si128(prediction128, prediction128);    /* Reset to 0. */
-
-            switch (order)
-            {
-            case 12:
-            case 11: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(1, 1, 0, 0))));
-            case 10:
-            case  9: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(3, 3, 2, 2))));
-            case  8:
-            case  7: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(1, 1, 0, 0))));
-            case  6:
-            case  5: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(3, 3, 2, 2))));
-            case  4:
-            case  3: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(1, 1, 0, 0))));
-            case  2:
-            case  1: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(3, 3, 2, 2))));
-            }
-
-            /* Horizontal add and shift. */
-            prediction128 = drflac__mm_hadd_epi64(prediction128);
-            prediction128 = drflac__mm_srai_epi64(prediction128, shift);
-            prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
-
-            /* Our value should be sitting in prediction128[0]. We need to combine this with our SSE samples. */
-            samples128_8 = _mm_alignr_epi8(samples128_4,  samples128_8, 4);
-            samples128_4 = _mm_alignr_epi8(samples128_0,  samples128_4, 4);
-            samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
-
-            /* Slide our rice parameter down so that the value in position 0 contains the next one to process. */
-            riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
-        }
-
-        /* We store samples in groups of 4. */
-        _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0);
-        pDecodedSamples += 4;
-    }
-
-    /* Make sure we process the last few samples. */
-    i = (count & ~3);
-    while (i < (int)count) {
-        /* Rice extraction. */
-        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) {
-            return DRFLAC_FALSE;
-        }
-
-        /* Rice reconstruction. */
-        riceParamParts0 &= riceParamMask;
-        riceParamParts0 |= (zeroCountParts0 << riceParam);
-        riceParamParts0  = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01];
-
-        /* Sample reconstruction. */
-        pDecodedSamples[0] = riceParamParts0 + drflac__calculate_prediction_64(order, shift, coefficients, pDecodedSamples);
-
-        i += 1;
-        pDecodedSamples += 1;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
-{
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
-    DRFLAC_ASSERT(pSamplesOut != NULL);
-
-    /* In my testing the order is rarely > 12, so in this case I'm going to simplify the SSE implementation by only handling order <= 12. */
-    if (order > 0 && order <= 12) {
-        if (bitsPerSample+shift > 32) {
-            return drflac__decode_samples_with_residual__rice__sse41_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
-        } else {
-            return drflac__decode_samples_with_residual__rice__sse41_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
-        }
-    } else {
-        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac__vst2q_s32(drflac_int32* p, int32x4x2_t x)
-{
-    vst1q_s32(p+0, x.val[0]);
-    vst1q_s32(p+4, x.val[1]);
-}
-
-static DRFLAC_INLINE void drflac__vst2q_u32(drflac_uint32* p, uint32x4x2_t x)
-{
-    vst1q_u32(p+0, x.val[0]);
-    vst1q_u32(p+4, x.val[1]);
-}
-
-static DRFLAC_INLINE void drflac__vst2q_f32(float* p, float32x4x2_t x)
-{
-    vst1q_f32(p+0, x.val[0]);
-    vst1q_f32(p+4, x.val[1]);
-}
-
-static DRFLAC_INLINE void drflac__vst2q_s16(drflac_int16* p, int16x4x2_t x)
-{
-    vst1q_s16(p, vcombine_s16(x.val[0], x.val[1]));
-}
-
-static DRFLAC_INLINE void drflac__vst2q_u16(drflac_uint16* p, uint16x4x2_t x)
-{
-    vst1q_u16(p, vcombine_u16(x.val[0], x.val[1]));
-}
-
-static DRFLAC_INLINE int32x4_t drflac__vdupq_n_s32x4(drflac_int32 x3, drflac_int32 x2, drflac_int32 x1, drflac_int32 x0)
-{
-    drflac_int32 x[4];
-    x[3] = x3;
-    x[2] = x2;
-    x[1] = x1;
-    x[0] = x0;
-    return vld1q_s32(x);
-}
-
-static DRFLAC_INLINE int32x4_t drflac__valignrq_s32_1(int32x4_t a, int32x4_t b)
-{
-    /* Equivalent to SSE's _mm_alignr_epi8(a, b, 4) */
-
-    /* Reference */
-    /*return drflac__vdupq_n_s32x4(
-        vgetq_lane_s32(a, 0),
-        vgetq_lane_s32(b, 3),
-        vgetq_lane_s32(b, 2),
-        vgetq_lane_s32(b, 1)
-    );*/
-
-    return vextq_s32(b, a, 1);
-}
-
-static DRFLAC_INLINE uint32x4_t drflac__valignrq_u32_1(uint32x4_t a, uint32x4_t b)
-{
-    /* Equivalent to SSE's _mm_alignr_epi8(a, b, 4) */
-
-    /* Reference */
-    /*return drflac__vdupq_n_s32x4(
-        vgetq_lane_s32(a, 0),
-        vgetq_lane_s32(b, 3),
-        vgetq_lane_s32(b, 2),
-        vgetq_lane_s32(b, 1)
-    );*/
-
-    return vextq_u32(b, a, 1);
-}
-
-static DRFLAC_INLINE int32x2_t drflac__vhaddq_s32(int32x4_t x)
-{
-    /* The sum must end up in position 0. */
-
-    /* Reference */
-    /*return vdupq_n_s32(
-        vgetq_lane_s32(x, 3) +
-        vgetq_lane_s32(x, 2) +
-        vgetq_lane_s32(x, 1) +
-        vgetq_lane_s32(x, 0)
-    );*/
-
-    int32x2_t r = vadd_s32(vget_high_s32(x), vget_low_s32(x));
-    return vpadd_s32(r, r);
-}
-
-static DRFLAC_INLINE int64x1_t drflac__vhaddq_s64(int64x2_t x)
-{
-    return vadd_s64(vget_high_s64(x), vget_low_s64(x));
-}
-
-static DRFLAC_INLINE int32x4_t drflac__vrevq_s32(int32x4_t x)
-{
-    /* Reference */
-    /*return drflac__vdupq_n_s32x4(
-        vgetq_lane_s32(x, 0),
-        vgetq_lane_s32(x, 1),
-        vgetq_lane_s32(x, 2),
-        vgetq_lane_s32(x, 3)
-    );*/
-
-    return vrev64q_s32(vcombine_s32(vget_high_s32(x), vget_low_s32(x)));
-}
-
-static DRFLAC_INLINE int32x4_t drflac__vnotq_s32(int32x4_t x)
-{
-    return veorq_s32(x, vdupq_n_s32(0xFFFFFFFF));
-}
-
-static DRFLAC_INLINE uint32x4_t drflac__vnotq_u32(uint32x4_t x)
-{
-    return veorq_u32(x, vdupq_n_u32(0xFFFFFFFF));
-}
-
-static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_32(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
-{
-    int i;
-    drflac_uint32 riceParamMask;
-    drflac_int32* pDecodedSamples    = pSamplesOut;
-    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
-    drflac_uint32 zeroCountParts[4];
-    drflac_uint32 riceParamParts[4];
-    int32x4_t coefficients128_0;
-    int32x4_t coefficients128_4;
-    int32x4_t coefficients128_8;
-    int32x4_t samples128_0;
-    int32x4_t samples128_4;
-    int32x4_t samples128_8;
-    uint32x4_t riceParamMask128;
-    int32x4_t riceParam128;
-    int32x2_t shift64;
-    uint32x4_t one128;
-
-    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
-
-    riceParamMask    = ~((~0UL) << riceParam);
-    riceParamMask128 = vdupq_n_u32(riceParamMask);
-
-    riceParam128 = vdupq_n_s32(riceParam);
-    shift64 = vdup_n_s32(-shift); /* Negate the shift because we'll be doing a variable shift using vshlq_s32(). */
-    one128 = vdupq_n_u32(1);
-
-    /*
-    Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
-    what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results
-    in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
-    so I think there's opportunity for this to be simplified.
-    */
-    {
-        int runningOrder = order;
-        drflac_int32 tempC[4] = {0, 0, 0, 0};
-        drflac_int32 tempS[4] = {0, 0, 0, 0};
-
-        /* 0 - 3. */
-        if (runningOrder >= 4) {
-            coefficients128_0 = vld1q_s32(coefficients + 0);
-            samples128_0      = vld1q_s32(pSamplesOut  - 4);
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3]; /* fallthrough */
-                case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2]; /* fallthrough */
-                case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1]; /* fallthrough */
-            }
-
-            coefficients128_0 = vld1q_s32(tempC);
-            samples128_0      = vld1q_s32(tempS);
-            runningOrder = 0;
-        }
-
-        /* 4 - 7 */
-        if (runningOrder >= 4) {
-            coefficients128_4 = vld1q_s32(coefficients + 4);
-            samples128_4      = vld1q_s32(pSamplesOut  - 8);
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7]; /* fallthrough */
-                case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6]; /* fallthrough */
-                case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5]; /* fallthrough */
-            }
-
-            coefficients128_4 = vld1q_s32(tempC);
-            samples128_4      = vld1q_s32(tempS);
-            runningOrder = 0;
-        }
-
-        /* 8 - 11 */
-        if (runningOrder == 4) {
-            coefficients128_8 = vld1q_s32(coefficients + 8);
-            samples128_8      = vld1q_s32(pSamplesOut  - 12);
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11]; /* fallthrough */
-                case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10]; /* fallthrough */
-                case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9]; /* fallthrough */
-            }
-
-            coefficients128_8 = vld1q_s32(tempC);
-            samples128_8      = vld1q_s32(tempS);
-            runningOrder = 0;
-        }
-
-        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
-        coefficients128_0 = drflac__vrevq_s32(coefficients128_0);
-        coefficients128_4 = drflac__vrevq_s32(coefficients128_4);
-        coefficients128_8 = drflac__vrevq_s32(coefficients128_8);
-    }
-
-    /* For this version we are doing one sample at a time. */
-    while (pDecodedSamples < pDecodedSamplesEnd) {
-        int32x4_t prediction128;
-        int32x2_t prediction64;
-        uint32x4_t zeroCountPart128;
-        uint32x4_t riceParamPart128;
-
-        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[3], &riceParamParts[3])) {
-            return DRFLAC_FALSE;
-        }
-
-        zeroCountPart128 = vld1q_u32(zeroCountParts);
-        riceParamPart128 = vld1q_u32(riceParamParts);
-
-        riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128);
-        riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128));
-        riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128));
-
-        if (order <= 4) {
-            for (i = 0; i < 4; i += 1) {
-                prediction128 = vmulq_s32(coefficients128_0, samples128_0);
-
-                /* Horizontal add and shift. */
-                prediction64 = drflac__vhaddq_s32(prediction128);
-                prediction64 = vshl_s32(prediction64, shift64);
-                prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
-
-                samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
-                riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
-            }
-        } else if (order <= 8) {
-            for (i = 0; i < 4; i += 1) {
-                prediction128 =                vmulq_s32(coefficients128_4, samples128_4);
-                prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0);
-
-                /* Horizontal add and shift. */
-                prediction64 = drflac__vhaddq_s32(prediction128);
-                prediction64 = vshl_s32(prediction64, shift64);
-                prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
-
-                samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
-                samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
-                riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
-            }
-        } else {
-            for (i = 0; i < 4; i += 1) {
-                prediction128 =                vmulq_s32(coefficients128_8, samples128_8);
-                prediction128 = vmlaq_s32(prediction128, coefficients128_4, samples128_4);
-                prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0);
-
-                /* Horizontal add and shift. */
-                prediction64 = drflac__vhaddq_s32(prediction128);
-                prediction64 = vshl_s32(prediction64, shift64);
-                prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
-
-                samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8);
-                samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
-                samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
-                riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
-            }
-        }
-
-        /* We store samples in groups of 4. */
-        vst1q_s32(pDecodedSamples, samples128_0);
-        pDecodedSamples += 4;
-    }
-
-    /* Make sure we process the last few samples. */
-    i = (count & ~3);
-    while (i < (int)count) {
-        /* Rice extraction. */
-        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0])) {
-            return DRFLAC_FALSE;
-        }
-
-        /* Rice reconstruction. */
-        riceParamParts[0] &= riceParamMask;
-        riceParamParts[0] |= (zeroCountParts[0] << riceParam);
-        riceParamParts[0]  = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01];
-
-        /* Sample reconstruction. */
-        pDecodedSamples[0] = riceParamParts[0] + drflac__calculate_prediction_32(order, shift, coefficients, pDecodedSamples);
-
-        i += 1;
-        pDecodedSamples += 1;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
-{
-    int i;
-    drflac_uint32 riceParamMask;
-    drflac_int32* pDecodedSamples    = pSamplesOut;
-    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
-    drflac_uint32 zeroCountParts[4];
-    drflac_uint32 riceParamParts[4];
-    int32x4_t coefficients128_0;
-    int32x4_t coefficients128_4;
-    int32x4_t coefficients128_8;
-    int32x4_t samples128_0;
-    int32x4_t samples128_4;
-    int32x4_t samples128_8;
-    uint32x4_t riceParamMask128;
-    int32x4_t riceParam128;
-    int64x1_t shift64;
-    uint32x4_t one128;
-
-    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
-
-    riceParamMask    = ~((~0UL) << riceParam);
-    riceParamMask128 = vdupq_n_u32(riceParamMask);
-
-    riceParam128 = vdupq_n_s32(riceParam);
-    shift64 = vdup_n_s64(-shift); /* Negate the shift because we'll be doing a variable shift using vshlq_s32(). */
-    one128 = vdupq_n_u32(1);
-
-    /*
-    Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
-    what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results
-    in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
-    so I think there's opportunity for this to be simplified.
-    */
-    {
-        int runningOrder = order;
-        drflac_int32 tempC[4] = {0, 0, 0, 0};
-        drflac_int32 tempS[4] = {0, 0, 0, 0};
-
-        /* 0 - 3. */
-        if (runningOrder >= 4) {
-            coefficients128_0 = vld1q_s32(coefficients + 0);
-            samples128_0      = vld1q_s32(pSamplesOut  - 4);
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3]; /* fallthrough */
-                case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2]; /* fallthrough */
-                case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1]; /* fallthrough */
-            }
-
-            coefficients128_0 = vld1q_s32(tempC);
-            samples128_0      = vld1q_s32(tempS);
-            runningOrder = 0;
-        }
-
-        /* 4 - 7 */
-        if (runningOrder >= 4) {
-            coefficients128_4 = vld1q_s32(coefficients + 4);
-            samples128_4      = vld1q_s32(pSamplesOut  - 8);
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7]; /* fallthrough */
-                case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6]; /* fallthrough */
-                case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5]; /* fallthrough */
-            }
-
-            coefficients128_4 = vld1q_s32(tempC);
-            samples128_4      = vld1q_s32(tempS);
-            runningOrder = 0;
-        }
-
-        /* 8 - 11 */
-        if (runningOrder == 4) {
-            coefficients128_8 = vld1q_s32(coefficients + 8);
-            samples128_8      = vld1q_s32(pSamplesOut  - 12);
-            runningOrder -= 4;
-        } else {
-            switch (runningOrder) {
-                case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11]; /* fallthrough */
-                case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10]; /* fallthrough */
-                case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9]; /* fallthrough */
-            }
-
-            coefficients128_8 = vld1q_s32(tempC);
-            samples128_8      = vld1q_s32(tempS);
-            runningOrder = 0;
-        }
-
-        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
-        coefficients128_0 = drflac__vrevq_s32(coefficients128_0);
-        coefficients128_4 = drflac__vrevq_s32(coefficients128_4);
-        coefficients128_8 = drflac__vrevq_s32(coefficients128_8);
-    }
-
-    /* For this version we are doing one sample at a time. */
-    while (pDecodedSamples < pDecodedSamplesEnd) {
-        int64x2_t prediction128;
-        uint32x4_t zeroCountPart128;
-        uint32x4_t riceParamPart128;
-
-        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) ||
-            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[3], &riceParamParts[3])) {
-            return DRFLAC_FALSE;
-        }
-
-        zeroCountPart128 = vld1q_u32(zeroCountParts);
-        riceParamPart128 = vld1q_u32(riceParamParts);
-
-        riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128);
-        riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128));
-        riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128));
-
-        for (i = 0; i < 4; i += 1) {
-            int64x1_t prediction64;
-
-            prediction128 = veorq_s64(prediction128, prediction128);    /* Reset to 0. */
-            switch (order)
-            {
-            case 12:
-            case 11: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_8), vget_low_s32(samples128_8)));
-            case 10:
-            case  9: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_8), vget_high_s32(samples128_8)));
-            case  8:
-            case  7: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_4), vget_low_s32(samples128_4)));
-            case  6:
-            case  5: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_4), vget_high_s32(samples128_4)));
-            case  4:
-            case  3: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_0), vget_low_s32(samples128_0)));
-            case  2:
-            case  1: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_0), vget_high_s32(samples128_0)));
-            }
-
-            /* Horizontal add and shift. */
-            prediction64 = drflac__vhaddq_s64(prediction128);
-            prediction64 = vshl_s64(prediction64, shift64);
-            prediction64 = vadd_s64(prediction64, vdup_n_s64(vgetq_lane_u32(riceParamPart128, 0)));
-
-            /* Our value should be sitting in prediction64[0]. We need to combine this with our SSE samples. */
-            samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8);
-            samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
-            samples128_0 = drflac__valignrq_s32_1(vcombine_s32(vreinterpret_s32_s64(prediction64), vdup_n_s32(0)), samples128_0);
-
-            /* Slide our rice parameter down so that the value in position 0 contains the next one to process. */
-            riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
-        }
-
-        /* We store samples in groups of 4. */
-        vst1q_s32(pDecodedSamples, samples128_0);
-        pDecodedSamples += 4;
-    }
-
-    /* Make sure we process the last few samples. */
-    i = (count & ~3);
-    while (i < (int)count) {
-        /* Rice extraction. */
-        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0])) {
-            return DRFLAC_FALSE;
-        }
-
-        /* Rice reconstruction. */
-        riceParamParts[0] &= riceParamMask;
-        riceParamParts[0] |= (zeroCountParts[0] << riceParam);
-        riceParamParts[0]  = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01];
-
-        /* Sample reconstruction. */
-        pDecodedSamples[0] = riceParamParts[0] + drflac__calculate_prediction_64(order, shift, coefficients, pDecodedSamples);
-
-        i += 1;
-        pDecodedSamples += 1;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__decode_samples_with_residual__rice__neon(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
-{
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
-    DRFLAC_ASSERT(pSamplesOut != NULL);
-
-    /* In my testing the order is rarely > 12, so in this case I'm going to simplify the NEON implementation by only handling order <= 12. */
-    if (order > 0 && order <= 12) {
-        if (bitsPerSample+shift > 32) {
-            return drflac__decode_samples_with_residual__rice__neon_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
-        } else {
-            return drflac__decode_samples_with_residual__rice__neon_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
-        }
-    } else {
-        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
-    }
-}
-#endif
-
-static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
-{
-#if defined(DRFLAC_SUPPORT_SSE41)
-    if (drflac__gIsSSE41Supported) {
-        return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported) {
-        return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-    #if 0
-        return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
-    #else
-        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
-    #endif
-    }
-}
-
-/* Reads and seeks past a string of residual values as Rice codes. The decoder should be sitting on the first bit of the Rice codes. */
-static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam)
-{
-    drflac_uint32 i;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
-
-    for (i = 0; i < count; ++i) {
-        if (!drflac__seek_rice_parts(bs, riceParam)) {
-            return DRFLAC_FALSE;
-        }
-    }
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
-{
-    drflac_uint32 i;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(count > 0);
-    DRFLAC_ASSERT(unencodedBitsPerSample <= 31);    /* <-- unencodedBitsPerSample is a 5 bit number, so cannot exceed 31. */
-    DRFLAC_ASSERT(pSamplesOut != NULL);
-
-    for (i = 0; i < count; ++i) {
-        if (unencodedBitsPerSample > 0) {
-            if (!drflac__read_int32(bs, unencodedBitsPerSample, pSamplesOut + i)) {
-                return DRFLAC_FALSE;
-            }
-        } else {
-            pSamplesOut[i] = 0;
-        }
-
-        if (bitsPerSample >= 24) {
-            pSamplesOut[i] += drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i);
-        } else {
-            pSamplesOut[i] += drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i);
-        }
-    }
-
-    return DRFLAC_TRUE;
-}
-
-
-/*
-Reads and decodes the residual for the sub-frame the decoder is currently sitting on. This function should be called
-when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be ignored. The
-<blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
-*/
-static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
-{
-    drflac_uint8 residualMethod;
-    drflac_uint8 partitionOrder;
-    drflac_uint32 samplesInPartition;
-    drflac_uint32 partitionsRemaining;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(blockSize != 0);
-    DRFLAC_ASSERT(pDecodedSamples != NULL);       /* <-- Should we allow NULL, in which case we just seek past the residual rather than do a full decode? */
-
-    if (!drflac__read_uint8(bs, 2, &residualMethod)) {
-        return DRFLAC_FALSE;
-    }
-
-    if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
-        return DRFLAC_FALSE;    /* Unknown or unsupported residual coding method. */
-    }
-
-    /* Ignore the first <order> values. */
-    pDecodedSamples += order;
-
-    if (!drflac__read_uint8(bs, 4, &partitionOrder)) {
-        return DRFLAC_FALSE;
-    }
-
-    /*
-    From the FLAC spec:
-      The Rice partition order in a Rice-coded residual section must be less than or equal to 8.
-    */
-    if (partitionOrder > 8) {
-        return DRFLAC_FALSE;
-    }
-
-    /* Validation check. */
-    if ((blockSize / (1 << partitionOrder)) <= order) {
-        return DRFLAC_FALSE;
-    }
-
-    samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
-    partitionsRemaining = (1 << partitionOrder);
-    for (;;) {
-        drflac_uint8 riceParam = 0;
-        if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) {
-            if (!drflac__read_uint8(bs, 4, &riceParam)) {
-                return DRFLAC_FALSE;
-            }
-            if (riceParam == 15) {
-                riceParam = 0xFF;
-            }
-        } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
-            if (!drflac__read_uint8(bs, 5, &riceParam)) {
-                return DRFLAC_FALSE;
-            }
-            if (riceParam == 31) {
-                riceParam = 0xFF;
-            }
-        }
-
-        if (riceParam != 0xFF) {
-            if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, order, shift, coefficients, pDecodedSamples)) {
-                return DRFLAC_FALSE;
-            }
-        } else {
-            drflac_uint8 unencodedBitsPerSample = 0;
-            if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) {
-                return DRFLAC_FALSE;
-            }
-
-            if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, order, shift, coefficients, pDecodedSamples)) {
-                return DRFLAC_FALSE;
-            }
-        }
-
-        pDecodedSamples += samplesInPartition;
-
-        if (partitionsRemaining == 1) {
-            break;
-        }
-
-        partitionsRemaining -= 1;
-
-        if (partitionOrder != 0) {
-            samplesInPartition = blockSize / (1 << partitionOrder);
-        }
-    }
-
-    return DRFLAC_TRUE;
-}
-
-/*
-Reads and seeks past the residual for the sub-frame the decoder is currently sitting on. This function should be called
-when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be set to 0. The
-<blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
-*/
-static drflac_bool32 drflac__read_and_seek_residual(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 order)
-{
-    drflac_uint8 residualMethod;
-    drflac_uint8 partitionOrder;
-    drflac_uint32 samplesInPartition;
-    drflac_uint32 partitionsRemaining;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(blockSize != 0);
-
-    if (!drflac__read_uint8(bs, 2, &residualMethod)) {
-        return DRFLAC_FALSE;
-    }
-
-    if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
-        return DRFLAC_FALSE;    /* Unknown or unsupported residual coding method. */
-    }
-
-    if (!drflac__read_uint8(bs, 4, &partitionOrder)) {
-        return DRFLAC_FALSE;
-    }
-
-    /*
-    From the FLAC spec:
-      The Rice partition order in a Rice-coded residual section must be less than or equal to 8.
-    */
-    if (partitionOrder > 8) {
-        return DRFLAC_FALSE;
-    }
-
-    /* Validation check. */
-    if ((blockSize / (1 << partitionOrder)) <= order) {
-        return DRFLAC_FALSE;
-    }
-
-    samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
-    partitionsRemaining = (1 << partitionOrder);
-    for (;;)
-    {
-        drflac_uint8 riceParam = 0;
-        if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) {
-            if (!drflac__read_uint8(bs, 4, &riceParam)) {
-                return DRFLAC_FALSE;
-            }
-            if (riceParam == 15) {
-                riceParam = 0xFF;
-            }
-        } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
-            if (!drflac__read_uint8(bs, 5, &riceParam)) {
-                return DRFLAC_FALSE;
-            }
-            if (riceParam == 31) {
-                riceParam = 0xFF;
-            }
-        }
-
-        if (riceParam != 0xFF) {
-            if (!drflac__read_and_seek_residual__rice(bs, samplesInPartition, riceParam)) {
-                return DRFLAC_FALSE;
-            }
-        } else {
-            drflac_uint8 unencodedBitsPerSample = 0;
-            if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) {
-                return DRFLAC_FALSE;
-            }
-
-            if (!drflac__seek_bits(bs, unencodedBitsPerSample * samplesInPartition)) {
-                return DRFLAC_FALSE;
-            }
-        }
-
-
-        if (partitionsRemaining == 1) {
-            break;
-        }
-
-        partitionsRemaining -= 1;
-        samplesInPartition = blockSize / (1 << partitionOrder);
-    }
-
-    return DRFLAC_TRUE;
-}
-
-
-static drflac_bool32 drflac__decode_samples__constant(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_int32* pDecodedSamples)
-{
-    drflac_uint32 i;
-
-    /* Only a single sample needs to be decoded here. */
-    drflac_int32 sample;
-    if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) {
-        return DRFLAC_FALSE;
-    }
-
-    /*
-    We don't really need to expand this, but it does simplify the process of reading samples. If this becomes a performance issue (unlikely)
-    we'll want to look at a more efficient way.
-    */
-    for (i = 0; i < blockSize; ++i) {
-        pDecodedSamples[i] = sample;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__decode_samples__verbatim(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_int32* pDecodedSamples)
-{
-    drflac_uint32 i;
-
-    for (i = 0; i < blockSize; ++i) {
-        drflac_int32 sample;
-        if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) {
-            return DRFLAC_FALSE;
-        }
-
-        pDecodedSamples[i] = sample;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples)
-{
-    drflac_uint32 i;
-
-    static drflac_int32 lpcCoefficientsTable[5][4] = {
-        {0,  0, 0,  0},
-        {1,  0, 0,  0},
-        {2, -1, 0,  0},
-        {3, -3, 1,  0},
-        {4, -6, 4, -1}
-    };
-
-    /* Warm up samples and coefficients. */
-    for (i = 0; i < lpcOrder; ++i) {
-        drflac_int32 sample;
-        if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) {
-            return DRFLAC_FALSE;
-        }
-
-        pDecodedSamples[i] = sample;
-    }
-
-    if (!drflac__decode_samples_with_residual(bs, subframeBitsPerSample, blockSize, lpcOrder, 0, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) {
-        return DRFLAC_FALSE;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples)
-{
-    drflac_uint8 i;
-    drflac_uint8 lpcPrecision;
-    drflac_int8 lpcShift;
-    drflac_int32 coefficients[32];
-
-    /* Warm up samples. */
-    for (i = 0; i < lpcOrder; ++i) {
-        drflac_int32 sample;
-        if (!drflac__read_int32(bs, bitsPerSample, &sample)) {
-            return DRFLAC_FALSE;
-        }
-
-        pDecodedSamples[i] = sample;
-    }
-
-    if (!drflac__read_uint8(bs, 4, &lpcPrecision)) {
-        return DRFLAC_FALSE;
-    }
-    if (lpcPrecision == 15) {
-        return DRFLAC_FALSE;    /* Invalid. */
-    }
-    lpcPrecision += 1;
-
-    if (!drflac__read_int8(bs, 5, &lpcShift)) {
-        return DRFLAC_FALSE;
-    }
-
-    DRFLAC_ZERO_MEMORY(coefficients, sizeof(coefficients));
-    for (i = 0; i < lpcOrder; ++i) {
-        if (!drflac__read_int32(bs, lpcPrecision, coefficients + i)) {
-            return DRFLAC_FALSE;
-        }
-    }
-
-    if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, coefficients, pDecodedSamples)) {
-        return DRFLAC_FALSE;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-
-static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_uint8 streaminfoBitsPerSample, drflac_frame_header* header)
-{
-    const drflac_uint32 sampleRateTable[12]  = {0, 88200, 176400, 192000, 8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000};
-    const drflac_uint8 bitsPerSampleTable[8] = {0, 8, 12, (drflac_uint8)-1, 16, 20, 24, (drflac_uint8)-1};   /* -1 = reserved. */
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(header != NULL);
-
-    /* Keep looping until we find a valid sync code. */
-    for (;;) {
-        drflac_uint8 crc8 = 0xCE; /* 0xCE = drflac_crc8(0, 0x3FFE, 14); */
-        drflac_uint8 reserved = 0;
-        drflac_uint8 blockingStrategy = 0;
-        drflac_uint8 blockSize = 0;
-        drflac_uint8 sampleRate = 0;
-        drflac_uint8 channelAssignment = 0;
-        drflac_uint8 bitsPerSample = 0;
-        drflac_bool32 isVariableBlockSize;
-
-        if (!drflac__find_and_seek_to_next_sync_code(bs)) {
-            return DRFLAC_FALSE;
-        }
-
-        if (!drflac__read_uint8(bs, 1, &reserved)) {
-            return DRFLAC_FALSE;
-        }
-        if (reserved == 1) {
-            continue;
-        }
-        crc8 = drflac_crc8(crc8, reserved, 1);
-
-        if (!drflac__read_uint8(bs, 1, &blockingStrategy)) {
-            return DRFLAC_FALSE;
-        }
-        crc8 = drflac_crc8(crc8, blockingStrategy, 1);
-
-        if (!drflac__read_uint8(bs, 4, &blockSize)) {
-            return DRFLAC_FALSE;
-        }
-        if (blockSize == 0) {
-            continue;
-        }
-        crc8 = drflac_crc8(crc8, blockSize, 4);
-
-        if (!drflac__read_uint8(bs, 4, &sampleRate)) {
-            return DRFLAC_FALSE;
-        }
-        crc8 = drflac_crc8(crc8, sampleRate, 4);
-
-        if (!drflac__read_uint8(bs, 4, &channelAssignment)) {
-            return DRFLAC_FALSE;
-        }
-        if (channelAssignment > 10) {
-            continue;
-        }
-        crc8 = drflac_crc8(crc8, channelAssignment, 4);
-
-        if (!drflac__read_uint8(bs, 3, &bitsPerSample)) {
-            return DRFLAC_FALSE;
-        }
-        if (bitsPerSample == 3 || bitsPerSample == 7) {
-            continue;
-        }
-        crc8 = drflac_crc8(crc8, bitsPerSample, 3);
-
-
-        if (!drflac__read_uint8(bs, 1, &reserved)) {
-            return DRFLAC_FALSE;
-        }
-        if (reserved == 1) {
-            continue;
-        }
-        crc8 = drflac_crc8(crc8, reserved, 1);
-
-
-        isVariableBlockSize = blockingStrategy == 1;
-        if (isVariableBlockSize) {
-            drflac_uint64 pcmFrameNumber;
-            drflac_result result = drflac__read_utf8_coded_number(bs, &pcmFrameNumber, &crc8);
-            if (result != DRFLAC_SUCCESS) {
-                if (result == DRFLAC_AT_END) {
-                    return DRFLAC_FALSE;
-                } else {
-                    continue;
-                }
-            }
-            header->flacFrameNumber  = 0;
-            header->pcmFrameNumber = pcmFrameNumber;
-        } else {
-            drflac_uint64 flacFrameNumber = 0;
-            drflac_result result = drflac__read_utf8_coded_number(bs, &flacFrameNumber, &crc8);
-            if (result != DRFLAC_SUCCESS) {
-                if (result == DRFLAC_AT_END) {
-                    return DRFLAC_FALSE;
-                } else {
-                    continue;
-                }
-            }
-            header->flacFrameNumber  = (drflac_uint32)flacFrameNumber;   /* <-- Safe cast. */
-            header->pcmFrameNumber = 0;
-        }
-
-
-        DRFLAC_ASSERT(blockSize > 0);
-        if (blockSize == 1) {
-            header->blockSizeInPCMFrames = 192;
-        } else if (blockSize >= 2 && blockSize <= 5) {
-            header->blockSizeInPCMFrames = 576 * (1 << (blockSize - 2));
-        } else if (blockSize == 6) {
-            if (!drflac__read_uint16(bs, 8, &header->blockSizeInPCMFrames)) {
-                return DRFLAC_FALSE;
-            }
-            crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 8);
-            header->blockSizeInPCMFrames += 1;
-        } else if (blockSize == 7) {
-            if (!drflac__read_uint16(bs, 16, &header->blockSizeInPCMFrames)) {
-                return DRFLAC_FALSE;
-            }
-            crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 16);
-            header->blockSizeInPCMFrames += 1;
-        } else {
-            DRFLAC_ASSERT(blockSize >= 8);
-            header->blockSizeInPCMFrames = 256 * (1 << (blockSize - 8));
-        }
-
-
-        if (sampleRate <= 11) {
-            header->sampleRate = sampleRateTable[sampleRate];
-        } else if (sampleRate == 12) {
-            if (!drflac__read_uint32(bs, 8, &header->sampleRate)) {
-                return DRFLAC_FALSE;
-            }
-            crc8 = drflac_crc8(crc8, header->sampleRate, 8);
-            header->sampleRate *= 1000;
-        } else if (sampleRate == 13) {
-            if (!drflac__read_uint32(bs, 16, &header->sampleRate)) {
-                return DRFLAC_FALSE;
-            }
-            crc8 = drflac_crc8(crc8, header->sampleRate, 16);
-        } else if (sampleRate == 14) {
-            if (!drflac__read_uint32(bs, 16, &header->sampleRate)) {
-                return DRFLAC_FALSE;
-            }
-            crc8 = drflac_crc8(crc8, header->sampleRate, 16);
-            header->sampleRate *= 10;
-        } else {
-            continue;  /* Invalid. Assume an invalid block. */
-        }
-
-
-        header->channelAssignment = channelAssignment;
-
-        header->bitsPerSample = bitsPerSampleTable[bitsPerSample];
-        if (header->bitsPerSample == 0) {
-            header->bitsPerSample = streaminfoBitsPerSample;
-        }
-
-        if (!drflac__read_uint8(bs, 8, &header->crc8)) {
-            return DRFLAC_FALSE;
-        }
-
-#ifndef DR_FLAC_NO_CRC
-        if (header->crc8 != crc8) {
-            continue;    /* CRC mismatch. Loop back to the top and find the next sync code. */
-        }
-#endif
-        return DRFLAC_TRUE;
-    }
-}
-
-static drflac_bool32 drflac__read_subframe_header(drflac_bs* bs, drflac_subframe* pSubframe)
-{
-    drflac_uint8 header;
-    int type;
-
-    if (!drflac__read_uint8(bs, 8, &header)) {
-        return DRFLAC_FALSE;
-    }
-
-    /* First bit should always be 0. */
-    if ((header & 0x80) != 0) {
-        return DRFLAC_FALSE;
-    }
-
-    type = (header & 0x7E) >> 1;
-    if (type == 0) {
-        pSubframe->subframeType = DRFLAC_SUBFRAME_CONSTANT;
-    } else if (type == 1) {
-        pSubframe->subframeType = DRFLAC_SUBFRAME_VERBATIM;
-    } else {
-        if ((type & 0x20) != 0) {
-            pSubframe->subframeType = DRFLAC_SUBFRAME_LPC;
-            pSubframe->lpcOrder = (drflac_uint8)(type & 0x1F) + 1;
-        } else if ((type & 0x08) != 0) {
-            pSubframe->subframeType = DRFLAC_SUBFRAME_FIXED;
-            pSubframe->lpcOrder = (drflac_uint8)(type & 0x07);
-            if (pSubframe->lpcOrder > 4) {
-                pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED;
-                pSubframe->lpcOrder = 0;
-            }
-        } else {
-            pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED;
-        }
-    }
-
-    if (pSubframe->subframeType == DRFLAC_SUBFRAME_RESERVED) {
-        return DRFLAC_FALSE;
-    }
-
-    /* Wasted bits per sample. */
-    pSubframe->wastedBitsPerSample = 0;
-    if ((header & 0x01) == 1) {
-        unsigned int wastedBitsPerSample;
-        if (!drflac__seek_past_next_set_bit(bs, &wastedBitsPerSample)) {
-            return DRFLAC_FALSE;
-        }
-        pSubframe->wastedBitsPerSample = (drflac_uint8)wastedBitsPerSample + 1;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex, drflac_int32* pDecodedSamplesOut)
-{
-    drflac_subframe* pSubframe;
-    drflac_uint32 subframeBitsPerSample;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(frame != NULL);
-
-    pSubframe = frame->subframes + subframeIndex;
-    if (!drflac__read_subframe_header(bs, pSubframe)) {
-        return DRFLAC_FALSE;
-    }
-
-    /* Side channels require an extra bit per sample. Took a while to figure that one out... */
-    subframeBitsPerSample = frame->header.bitsPerSample;
-    if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) {
-        subframeBitsPerSample += 1;
-    } else if (frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) {
-        subframeBitsPerSample += 1;
-    }
-
-    /* Need to handle wasted bits per sample. */
-    if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) {
-        return DRFLAC_FALSE;
-    }
-    subframeBitsPerSample -= pSubframe->wastedBitsPerSample;
-
-    pSubframe->pSamplesS32 = pDecodedSamplesOut;
-
-    switch (pSubframe->subframeType)
-    {
-        case DRFLAC_SUBFRAME_CONSTANT:
-        {
-            drflac__decode_samples__constant(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->pSamplesS32);
-        } break;
-
-        case DRFLAC_SUBFRAME_VERBATIM:
-        {
-            drflac__decode_samples__verbatim(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->pSamplesS32);
-        } break;
-
-        case DRFLAC_SUBFRAME_FIXED:
-        {
-            drflac__decode_samples__fixed(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->lpcOrder, pSubframe->pSamplesS32);
-        } break;
-
-        case DRFLAC_SUBFRAME_LPC:
-        {
-            drflac__decode_samples__lpc(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->lpcOrder, pSubframe->pSamplesS32);
-        } break;
-
-        default: return DRFLAC_FALSE;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__seek_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex)
-{
-    drflac_subframe* pSubframe;
-    drflac_uint32 subframeBitsPerSample;
-
-    DRFLAC_ASSERT(bs != NULL);
-    DRFLAC_ASSERT(frame != NULL);
-
-    pSubframe = frame->subframes + subframeIndex;
-    if (!drflac__read_subframe_header(bs, pSubframe)) {
-        return DRFLAC_FALSE;
-    }
-
-    /* Side channels require an extra bit per sample. Took a while to figure that one out... */
-    subframeBitsPerSample = frame->header.bitsPerSample;
-    if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) {
-        subframeBitsPerSample += 1;
-    } else if (frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) {
-        subframeBitsPerSample += 1;
-    }
-
-    /* Need to handle wasted bits per sample. */
-    if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) {
-        return DRFLAC_FALSE;
-    }
-    subframeBitsPerSample -= pSubframe->wastedBitsPerSample;
-
-    pSubframe->pSamplesS32 = NULL;
-
-    switch (pSubframe->subframeType)
-    {
-        case DRFLAC_SUBFRAME_CONSTANT:
-        {
-            if (!drflac__seek_bits(bs, subframeBitsPerSample)) {
-                return DRFLAC_FALSE;
-            }
-        } break;
-
-        case DRFLAC_SUBFRAME_VERBATIM:
-        {
-            unsigned int bitsToSeek = frame->header.blockSizeInPCMFrames * subframeBitsPerSample;
-            if (!drflac__seek_bits(bs, bitsToSeek)) {
-                return DRFLAC_FALSE;
-            }
-        } break;
-
-        case DRFLAC_SUBFRAME_FIXED:
-        {
-            unsigned int bitsToSeek = pSubframe->lpcOrder * subframeBitsPerSample;
-            if (!drflac__seek_bits(bs, bitsToSeek)) {
-                return DRFLAC_FALSE;
-            }
-
-            if (!drflac__read_and_seek_residual(bs, frame->header.blockSizeInPCMFrames, pSubframe->lpcOrder)) {
-                return DRFLAC_FALSE;
-            }
-        } break;
-
-        case DRFLAC_SUBFRAME_LPC:
-        {
-            drflac_uint8 lpcPrecision;
-
-            unsigned int bitsToSeek = pSubframe->lpcOrder * subframeBitsPerSample;
-            if (!drflac__seek_bits(bs, bitsToSeek)) {
-                return DRFLAC_FALSE;
-            }
-
-            if (!drflac__read_uint8(bs, 4, &lpcPrecision)) {
-                return DRFLAC_FALSE;
-            }
-            if (lpcPrecision == 15) {
-                return DRFLAC_FALSE;    /* Invalid. */
-            }
-            lpcPrecision += 1;
-
-
-            bitsToSeek = (pSubframe->lpcOrder * lpcPrecision) + 5;    /* +5 for shift. */
-            if (!drflac__seek_bits(bs, bitsToSeek)) {
-                return DRFLAC_FALSE;
-            }
-
-            if (!drflac__read_and_seek_residual(bs, frame->header.blockSizeInPCMFrames, pSubframe->lpcOrder)) {
-                return DRFLAC_FALSE;
-            }
-        } break;
-
-        default: return DRFLAC_FALSE;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-
-static DRFLAC_INLINE drflac_uint8 drflac__get_channel_count_from_channel_assignment(drflac_int8 channelAssignment)
-{
-    drflac_uint8 lookup[] = {1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2};
-
-    DRFLAC_ASSERT(channelAssignment <= 10);
-    return lookup[channelAssignment];
-}
-
-static drflac_result drflac__decode_flac_frame(drflac* pFlac)
-{
-    int channelCount;
-    int i;
-    drflac_uint8 paddingSizeInBits;
-    drflac_uint16 desiredCRC16;
-#ifndef DR_FLAC_NO_CRC
-    drflac_uint16 actualCRC16;
-#endif
-
-    /* This function should be called while the stream is sitting on the first byte after the frame header. */
-    DRFLAC_ZERO_MEMORY(pFlac->currentFLACFrame.subframes, sizeof(pFlac->currentFLACFrame.subframes));
-
-    /* The frame block size must never be larger than the maximum block size defined by the FLAC stream. */
-    if (pFlac->currentFLACFrame.header.blockSizeInPCMFrames > pFlac->maxBlockSizeInPCMFrames) {
-        return DRFLAC_ERROR;
-    }
-
-    /* The number of channels in the frame must match the channel count from the STREAMINFO block. */
-    channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
-    if (channelCount != (int)pFlac->channels) {
-        return DRFLAC_ERROR;
-    }
-
-    for (i = 0; i < channelCount; ++i) {
-        if (!drflac__decode_subframe(&pFlac->bs, &pFlac->currentFLACFrame, i, pFlac->pDecodedSamples + (pFlac->currentFLACFrame.header.blockSizeInPCMFrames * i))) {
-            return DRFLAC_ERROR;
-        }
-    }
-
-    paddingSizeInBits = (drflac_uint8)(DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7);
-    if (paddingSizeInBits > 0) {
-        drflac_uint8 padding = 0;
-        if (!drflac__read_uint8(&pFlac->bs, paddingSizeInBits, &padding)) {
-            return DRFLAC_AT_END;
-        }
-    }
-
-#ifndef DR_FLAC_NO_CRC
-    actualCRC16 = drflac__flush_crc16(&pFlac->bs);
-#endif
-    if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) {
-        return DRFLAC_AT_END;
-    }
-
-#ifndef DR_FLAC_NO_CRC
-    if (actualCRC16 != desiredCRC16) {
-        return DRFLAC_CRC_MISMATCH;    /* CRC mismatch. */
-    }
-#endif
-
-    pFlac->currentFLACFrame.pcmFramesRemaining = pFlac->currentFLACFrame.header.blockSizeInPCMFrames;
-
-    return DRFLAC_SUCCESS;
-}
-
-static drflac_result drflac__seek_flac_frame(drflac* pFlac)
-{
-    int channelCount;
-    int i;
-    drflac_uint16 desiredCRC16;
-#ifndef DR_FLAC_NO_CRC
-    drflac_uint16 actualCRC16;
-#endif
-
-    channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
-    for (i = 0; i < channelCount; ++i) {
-        if (!drflac__seek_subframe(&pFlac->bs, &pFlac->currentFLACFrame, i)) {
-            return DRFLAC_ERROR;
-        }
-    }
-
-    /* Padding. */
-    if (!drflac__seek_bits(&pFlac->bs, DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7)) {
-        return DRFLAC_ERROR;
-    }
-
-    /* CRC. */
-#ifndef DR_FLAC_NO_CRC
-    actualCRC16 = drflac__flush_crc16(&pFlac->bs);
-#endif
-    if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) {
-        return DRFLAC_AT_END;
-    }
-
-#ifndef DR_FLAC_NO_CRC
-    if (actualCRC16 != desiredCRC16) {
-        return DRFLAC_CRC_MISMATCH;    /* CRC mismatch. */
-    }
-#endif
-
-    return DRFLAC_SUCCESS;
-}
-
-static drflac_bool32 drflac__read_and_decode_next_flac_frame(drflac* pFlac)
-{
-    DRFLAC_ASSERT(pFlac != NULL);
-
-    for (;;) {
-        drflac_result result;
-
-        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
-            return DRFLAC_FALSE;
-        }
-
-        result = drflac__decode_flac_frame(pFlac);
-        if (result != DRFLAC_SUCCESS) {
-            if (result == DRFLAC_CRC_MISMATCH) {
-                continue;   /* CRC mismatch. Skip to the next frame. */
-            } else {
-                return DRFLAC_FALSE;
-            }
-        }
-
-        return DRFLAC_TRUE;
-    }
-}
-
-static void drflac__get_pcm_frame_range_of_current_flac_frame(drflac* pFlac, drflac_uint64* pFirstPCMFrame, drflac_uint64* pLastPCMFrame)
-{
-    drflac_uint64 firstPCMFrame;
-    drflac_uint64 lastPCMFrame;
-
-    DRFLAC_ASSERT(pFlac != NULL);
-
-    firstPCMFrame = pFlac->currentFLACFrame.header.pcmFrameNumber;
-    if (firstPCMFrame == 0) {
-        firstPCMFrame = ((drflac_uint64)pFlac->currentFLACFrame.header.flacFrameNumber) * pFlac->maxBlockSizeInPCMFrames;
-    }
-
-    lastPCMFrame = firstPCMFrame + pFlac->currentFLACFrame.header.blockSizeInPCMFrames;
-    if (lastPCMFrame > 0) {
-        lastPCMFrame -= 1; /* Needs to be zero based. */
-    }
-
-    if (pFirstPCMFrame) {
-        *pFirstPCMFrame = firstPCMFrame;
-    }
-    if (pLastPCMFrame) {
-        *pLastPCMFrame = lastPCMFrame;
-    }
-}
-
-static drflac_bool32 drflac__seek_to_first_frame(drflac* pFlac)
-{
-    drflac_bool32 result;
-
-    DRFLAC_ASSERT(pFlac != NULL);
-
-    result = drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes);
-
-    DRFLAC_ZERO_MEMORY(&pFlac->currentFLACFrame, sizeof(pFlac->currentFLACFrame));
-    pFlac->currentPCMFrame = 0;
-
-    return result;
-}
-
-static DRFLAC_INLINE drflac_result drflac__seek_to_next_flac_frame(drflac* pFlac)
-{
-    /* This function should only ever be called while the decoder is sitting on the first byte past the FRAME_HEADER section. */
-    DRFLAC_ASSERT(pFlac != NULL);
-    return drflac__seek_flac_frame(pFlac);
-}
-
-
-static drflac_uint64 drflac__seek_forward_by_pcm_frames(drflac* pFlac, drflac_uint64 pcmFramesToSeek)
-{
-    drflac_uint64 pcmFramesRead = 0;
-    while (pcmFramesToSeek > 0) {
-        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
-            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
-                break;  /* Couldn't read the next frame, so just break from the loop and return. */
-            }
-        } else {
-            if (pFlac->currentFLACFrame.pcmFramesRemaining > pcmFramesToSeek) {
-                pcmFramesRead   += pcmFramesToSeek;
-                pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)pcmFramesToSeek;   /* <-- Safe cast. Will always be < currentFrame.pcmFramesRemaining < 65536. */
-                pcmFramesToSeek  = 0;
-            } else {
-                pcmFramesRead   += pFlac->currentFLACFrame.pcmFramesRemaining;
-                pcmFramesToSeek -= pFlac->currentFLACFrame.pcmFramesRemaining;
-                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
-            }
-        }
-    }
-
-    pFlac->currentPCMFrame += pcmFramesRead;
-    return pcmFramesRead;
-}
-
-
-static drflac_bool32 drflac__seek_to_pcm_frame__brute_force(drflac* pFlac, drflac_uint64 pcmFrameIndex)
-{
-    drflac_bool32 isMidFrame = DRFLAC_FALSE;
-    drflac_uint64 runningPCMFrameCount;
-
-    DRFLAC_ASSERT(pFlac != NULL);
-
-    /* If we are seeking forward we start from the current position. Otherwise we need to start all the way from the start of the file. */
-    if (pcmFrameIndex >= pFlac->currentPCMFrame) {
-        /* Seeking forward. Need to seek from the current position. */
-        runningPCMFrameCount = pFlac->currentPCMFrame;
-
-        /* The frame header for the first frame may not yet have been read. We need to do that if necessary. */
-        if (pFlac->currentPCMFrame == 0 && pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
-            if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
-                return DRFLAC_FALSE;
-            }
-        } else {
-            isMidFrame = DRFLAC_TRUE;
-        }
-    } else {
-        /* Seeking backwards. Need to seek from the start of the file. */
-        runningPCMFrameCount = 0;
-
-        /* Move back to the start. */
-        if (!drflac__seek_to_first_frame(pFlac)) {
-            return DRFLAC_FALSE;
-        }
-
-        /* Decode the first frame in preparation for sample-exact seeking below. */
-        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
-            return DRFLAC_FALSE;
-        }
-    }
-
-    /*
-    We need to as quickly as possible find the frame that contains the target sample. To do this, we iterate over each frame and inspect its
-    header. If based on the header we can determine that the frame contains the sample, we do a full decode of that frame.
-    */
-    for (;;) {
-        drflac_uint64 pcmFrameCountInThisFLACFrame;
-        drflac_uint64 firstPCMFrameInFLACFrame = 0;
-        drflac_uint64 lastPCMFrameInFLACFrame = 0;
-
-        drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame);
-
-        pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
-        if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) {
-            /*
-            The sample should be in this frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend
-            it never existed and keep iterating.
-            */
-            drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount;
-
-            if (!isMidFrame) {
-                drflac_result result = drflac__decode_flac_frame(pFlac);
-                if (result == DRFLAC_SUCCESS) {
-                    /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */
-                    return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;  /* <-- If this fails, something bad has happened (it should never fail). */
-                } else {
-                    if (result == DRFLAC_CRC_MISMATCH) {
-                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
-                    } else {
-                        return DRFLAC_FALSE;
-                    }
-                }
-            } else {
-                /* We started seeking mid-frame which means we need to skip the frame decoding part. */
-                return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;
-            }
-        } else {
-            /*
-            It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
-            frame never existed and leave the running sample count untouched.
-            */
-            if (!isMidFrame) {
-                drflac_result result = drflac__seek_to_next_flac_frame(pFlac);
-                if (result == DRFLAC_SUCCESS) {
-                    runningPCMFrameCount += pcmFrameCountInThisFLACFrame;
-                } else {
-                    if (result == DRFLAC_CRC_MISMATCH) {
-                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
-                    } else {
-                        return DRFLAC_FALSE;
-                    }
-                }
-            } else {
-                /*
-                We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with
-                drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header.
-                */
-                runningPCMFrameCount += pFlac->currentFLACFrame.pcmFramesRemaining;
-                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
-                isMidFrame = DRFLAC_FALSE;
-            }
-
-            /* If we are seeking to the end of the file and we've just hit it, we're done. */
-            if (pcmFrameIndex == pFlac->totalPCMFrameCount && runningPCMFrameCount == pFlac->totalPCMFrameCount) {
-                return DRFLAC_TRUE;
-            }
-        }
-
-    next_iteration:
-        /* Grab the next frame in preparation for the next iteration. */
-        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
-            return DRFLAC_FALSE;
-        }
-    }
-}
-
-
-#if !defined(DR_FLAC_NO_CRC)
-/*
-We use an average compression ratio to determine our approximate start location. FLAC files are generally about 50%-70% the size of their
-uncompressed counterparts so we'll use this as a basis. I'm going to split the middle and use a factor of 0.6 to determine the starting
-location.
-*/
-#define DRFLAC_BINARY_SEARCH_APPROX_COMPRESSION_RATIO 0.6f
-
-static drflac_bool32 drflac__seek_to_approximate_flac_frame_to_byte(drflac* pFlac, drflac_uint64 targetByte, drflac_uint64 rangeLo, drflac_uint64 rangeHi, drflac_uint64* pLastSuccessfulSeekOffset)
-{
-    DRFLAC_ASSERT(pFlac != NULL);
-    DRFLAC_ASSERT(pLastSuccessfulSeekOffset != NULL);
-    DRFLAC_ASSERT(targetByte >= rangeLo);
-    DRFLAC_ASSERT(targetByte <= rangeHi);
-
-    *pLastSuccessfulSeekOffset = pFlac->firstFLACFramePosInBytes;
-
-    for (;;) {
-        /* When seeking to a byte, failure probably means we've attempted to seek beyond the end of the stream. To counter this we just halve it each attempt. */
-        if (!drflac__seek_to_byte(&pFlac->bs, targetByte)) {
-            /* If we couldn't even seek to the first byte in the stream we have a problem. Just abandon the whole thing. */
-            if (targetByte == 0) {
-                drflac__seek_to_first_frame(pFlac); /* Try to recover. */
-                return DRFLAC_FALSE;
-            }
-
-            /* Halve the byte location and continue. */
-            targetByte = rangeLo + ((rangeHi - rangeLo)/2);
-            rangeHi = targetByte;
-        } else {
-            /* Getting here should mean that we have seeked to an appropriate byte. */
-
-            /* Clear the details of the FLAC frame so we don't misreport data. */
-            DRFLAC_ZERO_MEMORY(&pFlac->currentFLACFrame, sizeof(pFlac->currentFLACFrame));
-
-            /*
-            Now seek to the next FLAC frame. We need to decode the entire frame (not just the header) because it's possible for the header to incorrectly pass the
-            CRC check and return bad data. We need to decode the entire frame to be more certain. Although this seems unlikely, this has happened to me in testing
-            so it needs to stay this way for now.
-            */
-#if 1
-            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
-                /* Halve the byte location and continue. */
-                targetByte = rangeLo + ((rangeHi - rangeLo)/2);
-                rangeHi = targetByte;
-            } else {
-                break;
-            }
-#else
-            if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
-                /* Halve the byte location and continue. */
-                targetByte = rangeLo + ((rangeHi - rangeLo)/2);
-                rangeHi = targetByte;
-            } else {
-                break;
-            }
-#endif
-        }
-    }
-
-    /* The current PCM frame needs to be updated based on the frame we just seeked to. */
-    drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &pFlac->currentPCMFrame, NULL);
-
-    DRFLAC_ASSERT(targetByte <= rangeHi);
-
-    *pLastSuccessfulSeekOffset = targetByte;
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(drflac* pFlac, drflac_uint64 offset)
-{
-    /* This section of code would be used if we were only decoding the FLAC frame header when calling drflac__seek_to_approximate_flac_frame_to_byte(). */
-#if 0
-    if (drflac__decode_flac_frame(pFlac) != DRFLAC_SUCCESS) {
-        /* We failed to decode this frame which may be due to it being corrupt. We'll just use the next valid FLAC frame. */
-        if (drflac__read_and_decode_next_flac_frame(pFlac) == DRFLAC_FALSE) {
-            return DRFLAC_FALSE;
-        }
-    }
-#endif
-
-    return drflac__seek_forward_by_pcm_frames(pFlac, offset) == offset;
-}
-
-
-static drflac_bool32 drflac__seek_to_pcm_frame__binary_search_internal(drflac* pFlac, drflac_uint64 pcmFrameIndex, drflac_uint64 byteRangeLo, drflac_uint64 byteRangeHi)
-{
-    /* This assumes pFlac->currentPCMFrame is sitting on byteRangeLo upon entry. */
-
-    drflac_uint64 targetByte;
-    drflac_uint64 pcmRangeLo = pFlac->totalPCMFrameCount;
-    drflac_uint64 pcmRangeHi = 0;
-    drflac_uint64 lastSuccessfulSeekOffset = (drflac_uint64)-1;
-    drflac_uint64 closestSeekOffsetBeforeTargetPCMFrame = byteRangeLo;
-    drflac_uint32 seekForwardThreshold = (pFlac->maxBlockSizeInPCMFrames != 0) ? pFlac->maxBlockSizeInPCMFrames*2 : 4096;
-
-    targetByte = byteRangeLo + (drflac_uint64)(((drflac_int64)((pcmFrameIndex - pFlac->currentPCMFrame) * pFlac->channels * pFlac->bitsPerSample)/8.0f) * DRFLAC_BINARY_SEARCH_APPROX_COMPRESSION_RATIO);
-    if (targetByte > byteRangeHi) {
-        targetByte = byteRangeHi;
-    }
-
-    for (;;) {
-        if (drflac__seek_to_approximate_flac_frame_to_byte(pFlac, targetByte, byteRangeLo, byteRangeHi, &lastSuccessfulSeekOffset)) {
-            /* We found a FLAC frame. We need to check if it contains the sample we're looking for. */
-            drflac_uint64 newPCMRangeLo;
-            drflac_uint64 newPCMRangeHi;
-            drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &newPCMRangeLo, &newPCMRangeHi);
-
-            /* If we selected the same frame, it means we should be pretty close. Just decode the rest. */
-            if (pcmRangeLo == newPCMRangeLo) {
-                if (!drflac__seek_to_approximate_flac_frame_to_byte(pFlac, closestSeekOffsetBeforeTargetPCMFrame, closestSeekOffsetBeforeTargetPCMFrame, byteRangeHi, &lastSuccessfulSeekOffset)) {
-                    break;  /* Failed to seek to closest frame. */
-                }
-
-                if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame)) {
-                    return DRFLAC_TRUE;
-                } else {
-                    break;  /* Failed to seek forward. */
-                }
-            }
-
-            pcmRangeLo = newPCMRangeLo;
-            pcmRangeHi = newPCMRangeHi;
-
-            if (pcmRangeLo <= pcmFrameIndex && pcmRangeHi >= pcmFrameIndex) {
-                /* The target PCM frame is in this FLAC frame. */
-                if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame) ) {
-                    return DRFLAC_TRUE;
-                } else {
-                    break;  /* Failed to seek to FLAC frame. */
-                }
-            } else {
-                const float approxCompressionRatio = (drflac_int64)(lastSuccessfulSeekOffset - pFlac->firstFLACFramePosInBytes) / ((drflac_int64)(pcmRangeLo * pFlac->channels * pFlac->bitsPerSample)/8.0f);
-
-                if (pcmRangeLo > pcmFrameIndex) {
-                    /* We seeked too far forward. We need to move our target byte backward and try again. */
-                    byteRangeHi = lastSuccessfulSeekOffset;
-                    if (byteRangeLo > byteRangeHi) {
-                        byteRangeLo = byteRangeHi;
-                    }
-
-                    targetByte = byteRangeLo + ((byteRangeHi - byteRangeLo) / 2);
-                    if (targetByte < byteRangeLo) {
-                        targetByte = byteRangeLo;
-                    }
-                } else /*if (pcmRangeHi < pcmFrameIndex)*/ {
-                    /* We didn't seek far enough. We need to move our target byte forward and try again. */
-
-                    /* If we're close enough we can just seek forward. */
-                    if ((pcmFrameIndex - pcmRangeLo) < seekForwardThreshold) {
-                        if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame)) {
-                            return DRFLAC_TRUE;
-                        } else {
-                            break;  /* Failed to seek to FLAC frame. */
-                        }
-                    } else {
-                        byteRangeLo = lastSuccessfulSeekOffset;
-                        if (byteRangeHi < byteRangeLo) {
-                            byteRangeHi = byteRangeLo;
-                        }
-
-                        targetByte = lastSuccessfulSeekOffset + (drflac_uint64)(((drflac_int64)((pcmFrameIndex-pcmRangeLo) * pFlac->channels * pFlac->bitsPerSample)/8.0f) * approxCompressionRatio);
-                        if (targetByte > byteRangeHi) {
-                            targetByte = byteRangeHi;
-                        }
-
-                        if (closestSeekOffsetBeforeTargetPCMFrame < lastSuccessfulSeekOffset) {
-                            closestSeekOffsetBeforeTargetPCMFrame = lastSuccessfulSeekOffset;
-                        }
-                    }
-                }
-            }
-        } else {
-            /* Getting here is really bad. We just recover as best we can, but moving to the first frame in the stream, and then abort. */
-            break;
-        }
-    }
-
-    drflac__seek_to_first_frame(pFlac); /* <-- Try to recover. */
-    return DRFLAC_FALSE;
-}
-
-static drflac_bool32 drflac__seek_to_pcm_frame__binary_search(drflac* pFlac, drflac_uint64 pcmFrameIndex)
-{
-    drflac_uint64 byteRangeLo;
-    drflac_uint64 byteRangeHi;
-    drflac_uint32 seekForwardThreshold = (pFlac->maxBlockSizeInPCMFrames != 0) ? pFlac->maxBlockSizeInPCMFrames*2 : 4096;
-
-    /* Our algorithm currently assumes the FLAC stream is currently sitting at the start. */
-    if (drflac__seek_to_first_frame(pFlac) == DRFLAC_FALSE) {
-        return DRFLAC_FALSE;
-    }
-
-    /* If we're close enough to the start, just move to the start and seek forward. */
-    if (pcmFrameIndex < seekForwardThreshold) {
-        return drflac__seek_forward_by_pcm_frames(pFlac, pcmFrameIndex) == pcmFrameIndex;
-    }
-
-    /*
-    Our starting byte range is the byte position of the first FLAC frame and the approximate end of the file as if it were completely uncompressed. This ensures
-    the entire file is included, even though most of the time it'll exceed the end of the actual stream. This is OK as the frame searching logic will handle it.
-    */
-    byteRangeLo = pFlac->firstFLACFramePosInBytes;
-    byteRangeHi = pFlac->firstFLACFramePosInBytes + (drflac_uint64)((drflac_int64)(pFlac->totalPCMFrameCount * pFlac->channels * pFlac->bitsPerSample)/8.0f);
-
-    return drflac__seek_to_pcm_frame__binary_search_internal(pFlac, pcmFrameIndex, byteRangeLo, byteRangeHi);
-}
-#endif  /* !DR_FLAC_NO_CRC */
-
-static drflac_bool32 drflac__seek_to_pcm_frame__seek_table(drflac* pFlac, drflac_uint64 pcmFrameIndex)
-{
-    drflac_uint32 iClosestSeekpoint = 0;
-    drflac_bool32 isMidFrame = DRFLAC_FALSE;
-    drflac_uint64 runningPCMFrameCount;
-    drflac_uint32 iSeekpoint;
-
-
-    DRFLAC_ASSERT(pFlac != NULL);
-
-    if (pFlac->pSeekpoints == NULL || pFlac->seekpointCount == 0) {
-        return DRFLAC_FALSE;
-    }
-
-    for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) {
-        if (pFlac->pSeekpoints[iSeekpoint].firstPCMFrame >= pcmFrameIndex) {
-            break;
-        }
-
-        iClosestSeekpoint = iSeekpoint;
-    }
-
-    /* There's been cases where the seek table contains only zeros. We need to do some basic validation on the closest seekpoint. */
-    if (pFlac->pSeekpoints[iClosestSeekpoint].pcmFrameCount == 0 || pFlac->pSeekpoints[iClosestSeekpoint].pcmFrameCount > pFlac->maxBlockSizeInPCMFrames) {
-        return DRFLAC_FALSE;
-    }
-    if (pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame > pFlac->totalPCMFrameCount && pFlac->totalPCMFrameCount > 0) {
-        return DRFLAC_FALSE;
-    }
-
-#if !defined(DR_FLAC_NO_CRC)
-    /* At this point we should know the closest seek point. We can use a binary search for this. We need to know the total sample count for this. */
-    if (pFlac->totalPCMFrameCount > 0) {
-        drflac_uint64 byteRangeLo;
-        drflac_uint64 byteRangeHi;
-
-        byteRangeHi = pFlac->firstFLACFramePosInBytes + (drflac_uint64)((drflac_int64)(pFlac->totalPCMFrameCount * pFlac->channels * pFlac->bitsPerSample)/8.0f);
-        byteRangeLo = pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset;
-
-        /*
-        If our closest seek point is not the last one, we only need to search between it and the next one. The section below calculates an appropriate starting
-        value for byteRangeHi which will clamp it appropriately.
-
-        Note that the next seekpoint must have an offset greater than the closest seekpoint because otherwise our binary search algorithm will break down. There
-        have been cases where a seektable consists of seek points where every byte offset is set to 0 which causes problems. If this happens we need to abort.
-        */
-        if (iClosestSeekpoint < pFlac->seekpointCount-1) {
-            drflac_uint32 iNextSeekpoint = iClosestSeekpoint + 1;
-
-            /* Basic validation on the seekpoints to ensure they're usable. */
-            if (pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset >= pFlac->pSeekpoints[iNextSeekpoint].flacFrameOffset || pFlac->pSeekpoints[iNextSeekpoint].pcmFrameCount == 0) {
-                return DRFLAC_FALSE;    /* The next seekpoint doesn't look right. The seek table cannot be trusted from here. Abort. */
-            }
-
-            if (pFlac->pSeekpoints[iNextSeekpoint].firstPCMFrame != (((drflac_uint64)0xFFFFFFFF << 32) | 0xFFFFFFFF)) { /* Make sure it's not a placeholder seekpoint. */
-                byteRangeHi = pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iNextSeekpoint].flacFrameOffset - 1; /* byteRangeHi must be zero based. */
-            }
-        }
-
-        if (drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset)) {
-            if (drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
-                drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &pFlac->currentPCMFrame, NULL);
-
-                if (drflac__seek_to_pcm_frame__binary_search_internal(pFlac, pcmFrameIndex, byteRangeLo, byteRangeHi)) {
-                    return DRFLAC_TRUE;
-                }
-            }
-        }
-    }
-#endif  /* !DR_FLAC_NO_CRC */
-
-    /* Getting here means we need to use a slower algorithm because the binary search method failed or cannot be used. */
-
-    /*
-    If we are seeking forward and the closest seekpoint is _before_ the current sample, we just seek forward from where we are. Otherwise we start seeking
-    from the seekpoint's first sample.
-    */
-    if (pcmFrameIndex >= pFlac->currentPCMFrame && pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame <= pFlac->currentPCMFrame) {
-        /* Optimized case. Just seek forward from where we are. */
-        runningPCMFrameCount = pFlac->currentPCMFrame;
-
-        /* The frame header for the first frame may not yet have been read. We need to do that if necessary. */
-        if (pFlac->currentPCMFrame == 0 && pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
-            if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
-                return DRFLAC_FALSE;
-            }
-        } else {
-            isMidFrame = DRFLAC_TRUE;
-        }
-    } else {
-        /* Slower case. Seek to the start of the seekpoint and then seek forward from there. */
-        runningPCMFrameCount = pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame;
-
-        if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset)) {
-            return DRFLAC_FALSE;
-        }
-
-        /* Grab the frame the seekpoint is sitting on in preparation for the sample-exact seeking below. */
-        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
-            return DRFLAC_FALSE;
-        }
-    }
-
-    for (;;) {
-        drflac_uint64 pcmFrameCountInThisFLACFrame;
-        drflac_uint64 firstPCMFrameInFLACFrame = 0;
-        drflac_uint64 lastPCMFrameInFLACFrame = 0;
-
-        drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame);
-
-        pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
-        if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) {
-            /*
-            The sample should be in this frame. We need to fully decode it, but if it's an invalid frame (a CRC mismatch) we need to pretend
-            it never existed and keep iterating.
-            */
-            drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount;
-
-            if (!isMidFrame) {
-                drflac_result result = drflac__decode_flac_frame(pFlac);
-                if (result == DRFLAC_SUCCESS) {
-                    /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */
-                    return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;  /* <-- If this fails, something bad has happened (it should never fail). */
-                } else {
-                    if (result == DRFLAC_CRC_MISMATCH) {
-                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
-                    } else {
-                        return DRFLAC_FALSE;
-                    }
-                }
-            } else {
-                /* We started seeking mid-frame which means we need to skip the frame decoding part. */
-                return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;
-            }
-        } else {
-            /*
-            It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
-            frame never existed and leave the running sample count untouched.
-            */
-            if (!isMidFrame) {
-                drflac_result result = drflac__seek_to_next_flac_frame(pFlac);
-                if (result == DRFLAC_SUCCESS) {
-                    runningPCMFrameCount += pcmFrameCountInThisFLACFrame;
-                } else {
-                    if (result == DRFLAC_CRC_MISMATCH) {
-                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
-                    } else {
-                        return DRFLAC_FALSE;
-                    }
-                }
-            } else {
-                /*
-                We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with
-                drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header.
-                */
-                runningPCMFrameCount += pFlac->currentFLACFrame.pcmFramesRemaining;
-                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
-                isMidFrame = DRFLAC_FALSE;
-            }
-
-            /* If we are seeking to the end of the file and we've just hit it, we're done. */
-            if (pcmFrameIndex == pFlac->totalPCMFrameCount && runningPCMFrameCount == pFlac->totalPCMFrameCount) {
-                return DRFLAC_TRUE;
-            }
-        }
-
-    next_iteration:
-        /* Grab the next frame in preparation for the next iteration. */
-        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
-            return DRFLAC_FALSE;
-        }
-    }
-}
-
-
-#ifndef DR_FLAC_NO_OGG
-typedef struct
-{
-    drflac_uint8 capturePattern[4];  /* Should be "OggS" */
-    drflac_uint8 structureVersion;   /* Always 0. */
-    drflac_uint8 headerType;
-    drflac_uint64 granulePosition;
-    drflac_uint32 serialNumber;
-    drflac_uint32 sequenceNumber;
-    drflac_uint32 checksum;
-    drflac_uint8 segmentCount;
-    drflac_uint8 segmentTable[255];
-} drflac_ogg_page_header;
-#endif
-
-typedef struct
-{
-    drflac_read_proc onRead;
-    drflac_seek_proc onSeek;
-    drflac_meta_proc onMeta;
-    drflac_container container;
-    void* pUserData;
-    void* pUserDataMD;
-    drflac_uint32 sampleRate;
-    drflac_uint8  channels;
-    drflac_uint8  bitsPerSample;
-    drflac_uint64 totalPCMFrameCount;
-    drflac_uint16 maxBlockSizeInPCMFrames;
-    drflac_uint64 runningFilePos;
-    drflac_bool32 hasStreamInfoBlock;
-    drflac_bool32 hasMetadataBlocks;
-    drflac_bs bs;                           /* <-- A bit streamer is required for loading data during initialization. */
-    drflac_frame_header firstFrameHeader;   /* <-- The header of the first frame that was read during relaxed initalization. Only set if there is no STREAMINFO block. */
-
-#ifndef DR_FLAC_NO_OGG
-    drflac_uint32 oggSerial;
-    drflac_uint64 oggFirstBytePos;
-    drflac_ogg_page_header oggBosHeader;
-#endif
-} drflac_init_info;
-
-static DRFLAC_INLINE void drflac__decode_block_header(drflac_uint32 blockHeader, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize)
-{
-    blockHeader = drflac__be2host_32(blockHeader);
-    *isLastBlock = (drflac_uint8)((blockHeader & 0x80000000UL) >> 31);
-    *blockType   = (drflac_uint8)((blockHeader & 0x7F000000UL) >> 24);
-    *blockSize   =                (blockHeader & 0x00FFFFFFUL);
-}
-
-static DRFLAC_INLINE drflac_bool32 drflac__read_and_decode_block_header(drflac_read_proc onRead, void* pUserData, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize)
-{
-    drflac_uint32 blockHeader;
-
-    *blockSize = 0;
-    if (onRead(pUserData, &blockHeader, 4) != 4) {
-        return DRFLAC_FALSE;
-    }
-
-    drflac__decode_block_header(blockHeader, isLastBlock, blockType, blockSize);
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__read_streaminfo(drflac_read_proc onRead, void* pUserData, drflac_streaminfo* pStreamInfo)
-{
-    drflac_uint32 blockSizes;
-    drflac_uint64 frameSizes = 0;
-    drflac_uint64 importantProps;
-    drflac_uint8 md5[16];
-
-    /* min/max block size. */
-    if (onRead(pUserData, &blockSizes, 4) != 4) {
-        return DRFLAC_FALSE;
-    }
-
-    /* min/max frame size. */
-    if (onRead(pUserData, &frameSizes, 6) != 6) {
-        return DRFLAC_FALSE;
-    }
-
-    /* Sample rate, channels, bits per sample and total sample count. */
-    if (onRead(pUserData, &importantProps, 8) != 8) {
-        return DRFLAC_FALSE;
-    }
-
-    /* MD5 */
-    if (onRead(pUserData, md5, sizeof(md5)) != sizeof(md5)) {
-        return DRFLAC_FALSE;
-    }
-
-    blockSizes     = drflac__be2host_32(blockSizes);
-    frameSizes     = drflac__be2host_64(frameSizes);
-    importantProps = drflac__be2host_64(importantProps);
-
-    pStreamInfo->minBlockSizeInPCMFrames = (drflac_uint16)((blockSizes & 0xFFFF0000) >> 16);
-    pStreamInfo->maxBlockSizeInPCMFrames = (drflac_uint16) (blockSizes & 0x0000FFFF);
-    pStreamInfo->minFrameSizeInPCMFrames = (drflac_uint32)((frameSizes     &  (((drflac_uint64)0x00FFFFFF << 16) << 24)) >> 40);
-    pStreamInfo->maxFrameSizeInPCMFrames = (drflac_uint32)((frameSizes     &  (((drflac_uint64)0x00FFFFFF << 16) <<  0)) >> 16);
-    pStreamInfo->sampleRate              = (drflac_uint32)((importantProps &  (((drflac_uint64)0x000FFFFF << 16) << 28)) >> 44);
-    pStreamInfo->channels                = (drflac_uint8 )((importantProps &  (((drflac_uint64)0x0000000E << 16) << 24)) >> 41) + 1;
-    pStreamInfo->bitsPerSample           = (drflac_uint8 )((importantProps &  (((drflac_uint64)0x0000001F << 16) << 20)) >> 36) + 1;
-    pStreamInfo->totalPCMFrameCount      =                ((importantProps & ((((drflac_uint64)0x0000000F << 16) << 16) | 0xFFFFFFFF)));
-    DRFLAC_COPY_MEMORY(pStreamInfo->md5, md5, sizeof(md5));
-
-    return DRFLAC_TRUE;
-}
-
-
-static void* drflac__malloc_default(size_t sz, void* pUserData)
-{
-    (void)pUserData;
-    return DRFLAC_MALLOC(sz);
-}
-
-static void* drflac__realloc_default(void* p, size_t sz, void* pUserData)
-{
-    (void)pUserData;
-    return DRFLAC_REALLOC(p, sz);
-}
-
-static void drflac__free_default(void* p, void* pUserData)
-{
-    (void)pUserData;
-    DRFLAC_FREE(p);
-}
-
-
-static void* drflac__malloc_from_callbacks(size_t sz, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pAllocationCallbacks == NULL) {
-        return NULL;
-    }
-
-    if (pAllocationCallbacks->onMalloc != NULL) {
-        return pAllocationCallbacks->onMalloc(sz, pAllocationCallbacks->pUserData);
-    }
-
-    /* Try using realloc(). */
-    if (pAllocationCallbacks->onRealloc != NULL) {
-        return pAllocationCallbacks->onRealloc(NULL, sz, pAllocationCallbacks->pUserData);
-    }
-
-    return NULL;
-}
-
-static void* drflac__realloc_from_callbacks(void* p, size_t szNew, size_t szOld, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pAllocationCallbacks == NULL) {
-        return NULL;
-    }
-
-    if (pAllocationCallbacks->onRealloc != NULL) {
-        return pAllocationCallbacks->onRealloc(p, szNew, pAllocationCallbacks->pUserData);
-    }
-
-    /* Try emulating realloc() in terms of malloc()/free(). */
-    if (pAllocationCallbacks->onMalloc != NULL && pAllocationCallbacks->onFree != NULL) {
-        void* p2;
-
-        p2 = pAllocationCallbacks->onMalloc(szNew, pAllocationCallbacks->pUserData);
-        if (p2 == NULL) {
-            return NULL;
-        }
-
-        if (p != NULL) {
-            DRFLAC_COPY_MEMORY(p2, p, szOld);
-            pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
-        }
-
-        return p2;
-    }
-
-    return NULL;
-}
-
-static void drflac__free_from_callbacks(void* p, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    if (p == NULL || pAllocationCallbacks == NULL) {
-        return;
-    }
-
-    if (pAllocationCallbacks->onFree != NULL) {
-        pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
-    }
-}
-
-
-static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_uint64* pFirstFramePos, drflac_uint64* pSeektablePos, drflac_uint32* pSeektableSize, drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    /*
-    We want to keep track of the byte position in the stream of the seektable. At the time of calling this function we know that
-    we'll be sitting on byte 42.
-    */
-    drflac_uint64 runningFilePos = 42;
-    drflac_uint64 seektablePos   = 0;
-    drflac_uint32 seektableSize  = 0;
-
-    for (;;) {
-        drflac_metadata metadata;
-        drflac_uint8 isLastBlock = 0;
-        drflac_uint8 blockType;
-        drflac_uint32 blockSize;
-        if (drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize) == DRFLAC_FALSE) {
-            return DRFLAC_FALSE;
-        }
-        runningFilePos += 4;
-
-        metadata.type = blockType;
-        metadata.pRawData = NULL;
-        metadata.rawDataSize = 0;
-
-        switch (blockType)
-        {
-            case DRFLAC_METADATA_BLOCK_TYPE_APPLICATION:
-            {
-                if (blockSize < 4) {
-                    return DRFLAC_FALSE;
-                }
-
-                if (onMeta) {
-                    void* pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
-                    if (pRawData == NULL) {
-                        return DRFLAC_FALSE;
-                    }
-
-                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
-
-                    metadata.pRawData = pRawData;
-                    metadata.rawDataSize = blockSize;
-                    metadata.data.application.id       = drflac__be2host_32(*(drflac_uint32*)pRawData);
-                    metadata.data.application.pData    = (const void*)((drflac_uint8*)pRawData + sizeof(drflac_uint32));
-                    metadata.data.application.dataSize = blockSize - sizeof(drflac_uint32);
-                    onMeta(pUserDataMD, &metadata);
-
-                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                }
-            } break;
-
-            case DRFLAC_METADATA_BLOCK_TYPE_SEEKTABLE:
-            {
-                seektablePos  = runningFilePos;
-                seektableSize = blockSize;
-
-                if (onMeta) {
-                    drflac_uint32 iSeekpoint;
-                    void* pRawData;
-
-                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
-                    if (pRawData == NULL) {
-                        return DRFLAC_FALSE;
-                    }
-
-                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
-
-                    metadata.pRawData = pRawData;
-                    metadata.rawDataSize = blockSize;
-                    metadata.data.seektable.seekpointCount = blockSize/sizeof(drflac_seekpoint);
-                    metadata.data.seektable.pSeekpoints = (const drflac_seekpoint*)pRawData;
-
-                    /* Endian swap. */
-                    for (iSeekpoint = 0; iSeekpoint < metadata.data.seektable.seekpointCount; ++iSeekpoint) {
-                        drflac_seekpoint* pSeekpoint = (drflac_seekpoint*)pRawData + iSeekpoint;
-                        pSeekpoint->firstPCMFrame   = drflac__be2host_64(pSeekpoint->firstPCMFrame);
-                        pSeekpoint->flacFrameOffset = drflac__be2host_64(pSeekpoint->flacFrameOffset);
-                        pSeekpoint->pcmFrameCount   = drflac__be2host_16(pSeekpoint->pcmFrameCount);
-                    }
-
-                    onMeta(pUserDataMD, &metadata);
-
-                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                }
-            } break;
-
-            case DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT:
-            {
-                if (blockSize < 8) {
-                    return DRFLAC_FALSE;
-                }
-
-                if (onMeta) {
-                    void* pRawData;
-                    const char* pRunningData;
-                    const char* pRunningDataEnd;
-                    drflac_uint32 i;
-
-                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
-                    if (pRawData == NULL) {
-                        return DRFLAC_FALSE;
-                    }
-
-                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
-
-                    metadata.pRawData = pRawData;
-                    metadata.rawDataSize = blockSize;
-
-                    pRunningData    = (const char*)pRawData;
-                    pRunningDataEnd = (const char*)pRawData + blockSize;
-
-                    metadata.data.vorbis_comment.vendorLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-
-                    /* Need space for the rest of the block */
-                    if ((pRunningDataEnd - pRunningData) - 4 < (drflac_int64)metadata.data.vorbis_comment.vendorLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
-                    metadata.data.vorbis_comment.vendor       = pRunningData;                                            pRunningData += metadata.data.vorbis_comment.vendorLength;
-                    metadata.data.vorbis_comment.commentCount = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-
-                    /* Need space for 'commentCount' comments after the block, which at minimum is a drflac_uint32 per comment */
-                    if ((pRunningDataEnd - pRunningData) / sizeof(drflac_uint32) < metadata.data.vorbis_comment.commentCount) { /* <-- Note the order of operations to avoid overflow to a valid value */
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
-                    metadata.data.vorbis_comment.pComments    = pRunningData;
-
-                    /* Check that the comments section is valid before passing it to the callback */
-                    for (i = 0; i < metadata.data.vorbis_comment.commentCount; ++i) {
-                        drflac_uint32 commentLength;
-
-                        if (pRunningDataEnd - pRunningData < 4) {
-                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                            return DRFLAC_FALSE;
-                        }
-
-                        commentLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                        if (pRunningDataEnd - pRunningData < (drflac_int64)commentLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
-                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                            return DRFLAC_FALSE;
-                        }
-                        pRunningData += commentLength;
-                    }
-
-                    onMeta(pUserDataMD, &metadata);
-
-                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                }
-            } break;
-
-            case DRFLAC_METADATA_BLOCK_TYPE_CUESHEET:
-            {
-                if (blockSize < 396) {
-                    return DRFLAC_FALSE;
-                }
-
-                if (onMeta) {
-                    void* pRawData;
-                    const char* pRunningData;
-                    const char* pRunningDataEnd;
-                    drflac_uint8 iTrack;
-                    drflac_uint8 iIndex;
-
-                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
-                    if (pRawData == NULL) {
-                        return DRFLAC_FALSE;
-                    }
-
-                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
-
-                    metadata.pRawData = pRawData;
-                    metadata.rawDataSize = blockSize;
-
-                    pRunningData    = (const char*)pRawData;
-                    pRunningDataEnd = (const char*)pRawData + blockSize;
-
-                    DRFLAC_COPY_MEMORY(metadata.data.cuesheet.catalog, pRunningData, 128);                              pRunningData += 128;
-                    metadata.data.cuesheet.leadInSampleCount = drflac__be2host_64(*(const drflac_uint64*)pRunningData); pRunningData += 8;
-                    metadata.data.cuesheet.isCD              = (pRunningData[0] & 0x80) != 0;                           pRunningData += 259;
-                    metadata.data.cuesheet.trackCount        = pRunningData[0];                                         pRunningData += 1;
-                    metadata.data.cuesheet.pTrackData        = pRunningData;
-
-                    /* Check that the cuesheet tracks are valid before passing it to the callback */
-                    for (iTrack = 0; iTrack < metadata.data.cuesheet.trackCount; ++iTrack) {
-                        drflac_uint8 indexCount;
-                        drflac_uint32 indexPointSize;
-
-                        if (pRunningDataEnd - pRunningData < 36) {
-                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                            return DRFLAC_FALSE;
-                        }
-
-                        /* Skip to the index point count */
-                        pRunningData += 35;
-                        indexCount = pRunningData[0]; pRunningData += 1;
-                        indexPointSize = indexCount * sizeof(drflac_cuesheet_track_index);
-                        if (pRunningDataEnd - pRunningData < (drflac_int64)indexPointSize) {
-                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                            return DRFLAC_FALSE;
-                        }
-
-                        /* Endian swap. */
-                        for (iIndex = 0; iIndex < indexCount; ++iIndex) {
-                            drflac_cuesheet_track_index* pTrack = (drflac_cuesheet_track_index*)pRunningData;
-                            pRunningData += sizeof(drflac_cuesheet_track_index);
-                            pTrack->offset = drflac__be2host_64(pTrack->offset);
-                        }
-                    }
-
-                    onMeta(pUserDataMD, &metadata);
-
-                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                }
-            } break;
-
-            case DRFLAC_METADATA_BLOCK_TYPE_PICTURE:
-            {
-                if (blockSize < 32) {
-                    return DRFLAC_FALSE;
-                }
-
-                if (onMeta) {
-                    void* pRawData;
-                    const char* pRunningData;
-                    const char* pRunningDataEnd;
-
-                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
-                    if (pRawData == NULL) {
-                        return DRFLAC_FALSE;
-                    }
-
-                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
-
-                    metadata.pRawData = pRawData;
-                    metadata.rawDataSize = blockSize;
-
-                    pRunningData    = (const char*)pRawData;
-                    pRunningDataEnd = (const char*)pRawData + blockSize;
-
-                    metadata.data.picture.type       = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.mimeLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-
-                    /* Need space for the rest of the block */
-                    if ((pRunningDataEnd - pRunningData) - 24 < (drflac_int64)metadata.data.picture.mimeLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
-                    metadata.data.picture.mime              = pRunningData;                                            pRunningData += metadata.data.picture.mimeLength;
-                    metadata.data.picture.descriptionLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-
-                    /* Need space for the rest of the block */
-                    if ((pRunningDataEnd - pRunningData) - 20 < (drflac_int64)metadata.data.picture.descriptionLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
-                    metadata.data.picture.description     = pRunningData;                                            pRunningData += metadata.data.picture.descriptionLength;
-                    metadata.data.picture.width           = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.height          = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.colorDepth      = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.indexColorCount = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.pictureDataSize = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-                    metadata.data.picture.pPictureData    = (const drflac_uint8*)pRunningData;
-
-                    /* Need space for the picture after the block */
-                    if (pRunningDataEnd - pRunningData < (drflac_int64)metadata.data.picture.pictureDataSize) { /* <-- Note the order of operations to avoid overflow to a valid value */
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
-
-                    onMeta(pUserDataMD, &metadata);
-
-                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                }
-            } break;
-
-            case DRFLAC_METADATA_BLOCK_TYPE_PADDING:
-            {
-                if (onMeta) {
-                    metadata.data.padding.unused = 0;
-
-                    /* Padding doesn't have anything meaningful in it, so just skip over it, but make sure the caller is aware of it by firing the callback. */
-                    if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
-                        isLastBlock = DRFLAC_TRUE;  /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */
-                    } else {
-                        onMeta(pUserDataMD, &metadata);
-                    }
-                }
-            } break;
-
-            case DRFLAC_METADATA_BLOCK_TYPE_INVALID:
-            {
-                /* Invalid chunk. Just skip over this one. */
-                if (onMeta) {
-                    if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
-                        isLastBlock = DRFLAC_TRUE;  /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */
-                    }
-                }
-            } break;
-
-            default:
-            {
-                /*
-                It's an unknown chunk, but not necessarily invalid. There's a chance more metadata blocks might be defined later on, so we
-                can at the very least report the chunk to the application and let it look at the raw data.
-                */
-                if (onMeta) {
-                    void* pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
-                    if (pRawData == NULL) {
-                        return DRFLAC_FALSE;
-                    }
-
-                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
-                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                        return DRFLAC_FALSE;
-                    }
-
-                    metadata.pRawData = pRawData;
-                    metadata.rawDataSize = blockSize;
-                    onMeta(pUserDataMD, &metadata);
-
-                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
-                }
-            } break;
-        }
-
-        /* If we're not handling metadata, just skip over the block. If we are, it will have been handled earlier in the switch statement above. */
-        if (onMeta == NULL && blockSize > 0) {
-            if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
-                isLastBlock = DRFLAC_TRUE;
-            }
-        }
-
-        runningFilePos += blockSize;
-        if (isLastBlock) {
-            break;
-        }
-    }
-
-    *pSeektablePos = seektablePos;
-    *pSeektableSize = seektableSize;
-    *pFirstFramePos = runningFilePos;
-
-    return DRFLAC_TRUE;
-}
-
-static drflac_bool32 drflac__init_private__native(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed)
-{
-    /* Pre Condition: The bit stream should be sitting just past the 4-byte id header. */
-
-    drflac_uint8 isLastBlock;
-    drflac_uint8 blockType;
-    drflac_uint32 blockSize;
-
-    (void)onSeek;
-
-    pInit->container = drflac_container_native;
-
-    /* The first metadata block should be the STREAMINFO block. */
-    if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) {
-        return DRFLAC_FALSE;
-    }
-
-    if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) {
-        if (!relaxed) {
-            /* We're opening in strict mode and the first block is not the STREAMINFO block. Error. */
-            return DRFLAC_FALSE;
-        } else {
-            /*
-            Relaxed mode. To open from here we need to just find the first frame and set the sample rate, etc. to whatever is defined
-            for that frame.
-            */
-            pInit->hasStreamInfoBlock = DRFLAC_FALSE;
-            pInit->hasMetadataBlocks  = DRFLAC_FALSE;
-
-            if (!drflac__read_next_flac_frame_header(&pInit->bs, 0, &pInit->firstFrameHeader)) {
-                return DRFLAC_FALSE;    /* Couldn't find a frame. */
-            }
-
-            if (pInit->firstFrameHeader.bitsPerSample == 0) {
-                return DRFLAC_FALSE;    /* Failed to initialize because the first frame depends on the STREAMINFO block, which does not exist. */
-            }
-
-            pInit->sampleRate              = pInit->firstFrameHeader.sampleRate;
-            pInit->channels                = drflac__get_channel_count_from_channel_assignment(pInit->firstFrameHeader.channelAssignment);
-            pInit->bitsPerSample           = pInit->firstFrameHeader.bitsPerSample;
-            pInit->maxBlockSizeInPCMFrames = 65535;   /* <-- See notes here: https://xiph.org/flac/format.html#metadata_block_streaminfo */
-            return DRFLAC_TRUE;
-        }
-    } else {
-        drflac_streaminfo streaminfo;
-        if (!drflac__read_streaminfo(onRead, pUserData, &streaminfo)) {
-            return DRFLAC_FALSE;
-        }
-
-        pInit->hasStreamInfoBlock      = DRFLAC_TRUE;
-        pInit->sampleRate              = streaminfo.sampleRate;
-        pInit->channels                = streaminfo.channels;
-        pInit->bitsPerSample           = streaminfo.bitsPerSample;
-        pInit->totalPCMFrameCount      = streaminfo.totalPCMFrameCount;
-        pInit->maxBlockSizeInPCMFrames = streaminfo.maxBlockSizeInPCMFrames;    /* Don't care about the min block size - only the max (used for determining the size of the memory allocation). */
-        pInit->hasMetadataBlocks       = !isLastBlock;
-
-        if (onMeta) {
-            drflac_metadata metadata;
-            metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO;
-            metadata.pRawData = NULL;
-            metadata.rawDataSize = 0;
-            metadata.data.streaminfo = streaminfo;
-            onMeta(pUserDataMD, &metadata);
-        }
-
-        return DRFLAC_TRUE;
-    }
-}
-
-#ifndef DR_FLAC_NO_OGG
-#define DRFLAC_OGG_MAX_PAGE_SIZE            65307
-#define DRFLAC_OGG_CAPTURE_PATTERN_CRC32    1605413199  /* CRC-32 of "OggS". */
-
-typedef enum
-{
-    drflac_ogg_recover_on_crc_mismatch,
-    drflac_ogg_fail_on_crc_mismatch
-} drflac_ogg_crc_mismatch_recovery;
-
-#ifndef DR_FLAC_NO_CRC
-static drflac_uint32 drflac__crc32_table[] = {
-    0x00000000L, 0x04C11DB7L, 0x09823B6EL, 0x0D4326D9L,
-    0x130476DCL, 0x17C56B6BL, 0x1A864DB2L, 0x1E475005L,
-    0x2608EDB8L, 0x22C9F00FL, 0x2F8AD6D6L, 0x2B4BCB61L,
-    0x350C9B64L, 0x31CD86D3L, 0x3C8EA00AL, 0x384FBDBDL,
-    0x4C11DB70L, 0x48D0C6C7L, 0x4593E01EL, 0x4152FDA9L,
-    0x5F15ADACL, 0x5BD4B01BL, 0x569796C2L, 0x52568B75L,
-    0x6A1936C8L, 0x6ED82B7FL, 0x639B0DA6L, 0x675A1011L,
-    0x791D4014L, 0x7DDC5DA3L, 0x709F7B7AL, 0x745E66CDL,
-    0x9823B6E0L, 0x9CE2AB57L, 0x91A18D8EL, 0x95609039L,
-    0x8B27C03CL, 0x8FE6DD8BL, 0x82A5FB52L, 0x8664E6E5L,
-    0xBE2B5B58L, 0xBAEA46EFL, 0xB7A96036L, 0xB3687D81L,
-    0xAD2F2D84L, 0xA9EE3033L, 0xA4AD16EAL, 0xA06C0B5DL,
-    0xD4326D90L, 0xD0F37027L, 0xDDB056FEL, 0xD9714B49L,
-    0xC7361B4CL, 0xC3F706FBL, 0xCEB42022L, 0xCA753D95L,
-    0xF23A8028L, 0xF6FB9D9FL, 0xFBB8BB46L, 0xFF79A6F1L,
-    0xE13EF6F4L, 0xE5FFEB43L, 0xE8BCCD9AL, 0xEC7DD02DL,
-    0x34867077L, 0x30476DC0L, 0x3D044B19L, 0x39C556AEL,
-    0x278206ABL, 0x23431B1CL, 0x2E003DC5L, 0x2AC12072L,
-    0x128E9DCFL, 0x164F8078L, 0x1B0CA6A1L, 0x1FCDBB16L,
-    0x018AEB13L, 0x054BF6A4L, 0x0808D07DL, 0x0CC9CDCAL,
-    0x7897AB07L, 0x7C56B6B0L, 0x71159069L, 0x75D48DDEL,
-    0x6B93DDDBL, 0x6F52C06CL, 0x6211E6B5L, 0x66D0FB02L,
-    0x5E9F46BFL, 0x5A5E5B08L, 0x571D7DD1L, 0x53DC6066L,
-    0x4D9B3063L, 0x495A2DD4L, 0x44190B0DL, 0x40D816BAL,
-    0xACA5C697L, 0xA864DB20L, 0xA527FDF9L, 0xA1E6E04EL,
-    0xBFA1B04BL, 0xBB60ADFCL, 0xB6238B25L, 0xB2E29692L,
-    0x8AAD2B2FL, 0x8E6C3698L, 0x832F1041L, 0x87EE0DF6L,
-    0x99A95DF3L, 0x9D684044L, 0x902B669DL, 0x94EA7B2AL,
-    0xE0B41DE7L, 0xE4750050L, 0xE9362689L, 0xEDF73B3EL,
-    0xF3B06B3BL, 0xF771768CL, 0xFA325055L, 0xFEF34DE2L,
-    0xC6BCF05FL, 0xC27DEDE8L, 0xCF3ECB31L, 0xCBFFD686L,
-    0xD5B88683L, 0xD1799B34L, 0xDC3ABDEDL, 0xD8FBA05AL,
-    0x690CE0EEL, 0x6DCDFD59L, 0x608EDB80L, 0x644FC637L,
-    0x7A089632L, 0x7EC98B85L, 0x738AAD5CL, 0x774BB0EBL,
-    0x4F040D56L, 0x4BC510E1L, 0x46863638L, 0x42472B8FL,
-    0x5C007B8AL, 0x58C1663DL, 0x558240E4L, 0x51435D53L,
-    0x251D3B9EL, 0x21DC2629L, 0x2C9F00F0L, 0x285E1D47L,
-    0x36194D42L, 0x32D850F5L, 0x3F9B762CL, 0x3B5A6B9BL,
-    0x0315D626L, 0x07D4CB91L, 0x0A97ED48L, 0x0E56F0FFL,
-    0x1011A0FAL, 0x14D0BD4DL, 0x19939B94L, 0x1D528623L,
-    0xF12F560EL, 0xF5EE4BB9L, 0xF8AD6D60L, 0xFC6C70D7L,
-    0xE22B20D2L, 0xE6EA3D65L, 0xEBA91BBCL, 0xEF68060BL,
-    0xD727BBB6L, 0xD3E6A601L, 0xDEA580D8L, 0xDA649D6FL,
-    0xC423CD6AL, 0xC0E2D0DDL, 0xCDA1F604L, 0xC960EBB3L,
-    0xBD3E8D7EL, 0xB9FF90C9L, 0xB4BCB610L, 0xB07DABA7L,
-    0xAE3AFBA2L, 0xAAFBE615L, 0xA7B8C0CCL, 0xA379DD7BL,
-    0x9B3660C6L, 0x9FF77D71L, 0x92B45BA8L, 0x9675461FL,
-    0x8832161AL, 0x8CF30BADL, 0x81B02D74L, 0x857130C3L,
-    0x5D8A9099L, 0x594B8D2EL, 0x5408ABF7L, 0x50C9B640L,
-    0x4E8EE645L, 0x4A4FFBF2L, 0x470CDD2BL, 0x43CDC09CL,
-    0x7B827D21L, 0x7F436096L, 0x7200464FL, 0x76C15BF8L,
-    0x68860BFDL, 0x6C47164AL, 0x61043093L, 0x65C52D24L,
-    0x119B4BE9L, 0x155A565EL, 0x18197087L, 0x1CD86D30L,
-    0x029F3D35L, 0x065E2082L, 0x0B1D065BL, 0x0FDC1BECL,
-    0x3793A651L, 0x3352BBE6L, 0x3E119D3FL, 0x3AD08088L,
-    0x2497D08DL, 0x2056CD3AL, 0x2D15EBE3L, 0x29D4F654L,
-    0xC5A92679L, 0xC1683BCEL, 0xCC2B1D17L, 0xC8EA00A0L,
-    0xD6AD50A5L, 0xD26C4D12L, 0xDF2F6BCBL, 0xDBEE767CL,
-    0xE3A1CBC1L, 0xE760D676L, 0xEA23F0AFL, 0xEEE2ED18L,
-    0xF0A5BD1DL, 0xF464A0AAL, 0xF9278673L, 0xFDE69BC4L,
-    0x89B8FD09L, 0x8D79E0BEL, 0x803AC667L, 0x84FBDBD0L,
-    0x9ABC8BD5L, 0x9E7D9662L, 0x933EB0BBL, 0x97FFAD0CL,
-    0xAFB010B1L, 0xAB710D06L, 0xA6322BDFL, 0xA2F33668L,
-    0xBCB4666DL, 0xB8757BDAL, 0xB5365D03L, 0xB1F740B4L
-};
-#endif
-
-static DRFLAC_INLINE drflac_uint32 drflac_crc32_byte(drflac_uint32 crc32, drflac_uint8 data)
-{
-#ifndef DR_FLAC_NO_CRC
-    return (crc32 << 8) ^ drflac__crc32_table[(drflac_uint8)((crc32 >> 24) & 0xFF) ^ data];
-#else
-    (void)data;
-    return crc32;
-#endif
-}
-
-#if 0
-static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint32(drflac_uint32 crc32, drflac_uint32 data)
-{
-    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 24) & 0xFF));
-    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 16) & 0xFF));
-    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >>  8) & 0xFF));
-    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >>  0) & 0xFF));
-    return crc32;
-}
-
-static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint64(drflac_uint32 crc32, drflac_uint64 data)
-{
-    crc32 = drflac_crc32_uint32(crc32, (drflac_uint32)((data >> 32) & 0xFFFFFFFF));
-    crc32 = drflac_crc32_uint32(crc32, (drflac_uint32)((data >>  0) & 0xFFFFFFFF));
-    return crc32;
-}
-#endif
-
-static DRFLAC_INLINE drflac_uint32 drflac_crc32_buffer(drflac_uint32 crc32, drflac_uint8* pData, drflac_uint32 dataSize)
-{
-    /* This can be optimized. */
-    drflac_uint32 i;
-    for (i = 0; i < dataSize; ++i) {
-        crc32 = drflac_crc32_byte(crc32, pData[i]);
-    }
-    return crc32;
-}
-
-
-static DRFLAC_INLINE drflac_bool32 drflac_ogg__is_capture_pattern(drflac_uint8 pattern[4])
-{
-    return pattern[0] == 'O' && pattern[1] == 'g' && pattern[2] == 'g' && pattern[3] == 'S';
-}
-
-static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_header_size(drflac_ogg_page_header* pHeader)
-{
-    return 27 + pHeader->segmentCount;
-}
-
-static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_body_size(drflac_ogg_page_header* pHeader)
-{
-    drflac_uint32 pageBodySize = 0;
-    int i;
-
-    for (i = 0; i < pHeader->segmentCount; ++i) {
-        pageBodySize += pHeader->segmentTable[i];
-    }
-
-    return pageBodySize;
-}
-
-static drflac_result drflac_ogg__read_page_header_after_capture_pattern(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32)
-{
-    drflac_uint8 data[23];
-    drflac_uint32 i;
-
-    DRFLAC_ASSERT(*pCRC32 == DRFLAC_OGG_CAPTURE_PATTERN_CRC32);
-
-    if (onRead(pUserData, data, 23) != 23) {
-        return DRFLAC_AT_END;
-    }
-    *pBytesRead += 23;
-
-    /*
-    It's not actually used, but set the capture pattern to 'OggS' for completeness. Not doing this will cause static analysers to complain about
-    us trying to access uninitialized data. We could alternatively just comment out this member of the drflac_ogg_page_header structure, but I
-    like to have it map to the structure of the underlying data.
-    */
-    pHeader->capturePattern[0] = 'O';
-    pHeader->capturePattern[1] = 'g';
-    pHeader->capturePattern[2] = 'g';
-    pHeader->capturePattern[3] = 'S';
-
-    pHeader->structureVersion = data[0];
-    pHeader->headerType       = data[1];
-    DRFLAC_COPY_MEMORY(&pHeader->granulePosition, &data[ 2], 8);
-    DRFLAC_COPY_MEMORY(&pHeader->serialNumber,    &data[10], 4);
-    DRFLAC_COPY_MEMORY(&pHeader->sequenceNumber,  &data[14], 4);
-    DRFLAC_COPY_MEMORY(&pHeader->checksum,        &data[18], 4);
-    pHeader->segmentCount     = data[22];
-
-    /* Calculate the CRC. Note that for the calculation the checksum part of the page needs to be set to 0. */
-    data[18] = 0;
-    data[19] = 0;
-    data[20] = 0;
-    data[21] = 0;
-
-    for (i = 0; i < 23; ++i) {
-        *pCRC32 = drflac_crc32_byte(*pCRC32, data[i]);
-    }
-
-
-    if (onRead(pUserData, pHeader->segmentTable, pHeader->segmentCount) != pHeader->segmentCount) {
-        return DRFLAC_AT_END;
-    }
-    *pBytesRead += pHeader->segmentCount;
-
-    for (i = 0; i < pHeader->segmentCount; ++i) {
-        *pCRC32 = drflac_crc32_byte(*pCRC32, pHeader->segmentTable[i]);
-    }
-
-    return DRFLAC_SUCCESS;
-}
-
-static drflac_result drflac_ogg__read_page_header(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32)
-{
-    drflac_uint8 id[4];
-
-    *pBytesRead = 0;
-
-    if (onRead(pUserData, id, 4) != 4) {
-        return DRFLAC_AT_END;
-    }
-    *pBytesRead += 4;
-
-    /* We need to read byte-by-byte until we find the OggS capture pattern. */
-    for (;;) {
-        if (drflac_ogg__is_capture_pattern(id)) {
-            drflac_result result;
-
-            *pCRC32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32;
-
-            result = drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, pHeader, pBytesRead, pCRC32);
-            if (result == DRFLAC_SUCCESS) {
-                return DRFLAC_SUCCESS;
-            } else {
-                if (result == DRFLAC_CRC_MISMATCH) {
-                    continue;
-                } else {
-                    return result;
-                }
-            }
-        } else {
-            /* The first 4 bytes did not equal the capture pattern. Read the next byte and try again. */
-            id[0] = id[1];
-            id[1] = id[2];
-            id[2] = id[3];
-            if (onRead(pUserData, &id[3], 1) != 1) {
-                return DRFLAC_AT_END;
-            }
-            *pBytesRead += 1;
-        }
-    }
-}
-
-
-/*
-The main part of the Ogg encapsulation is the conversion from the physical Ogg bitstream to the native FLAC bitstream. It works
-in three general stages: Ogg Physical Bitstream -> Ogg/FLAC Logical Bitstream -> FLAC Native Bitstream. dr_flac is designed
-in such a way that the core sections assume everything is delivered in native format. Therefore, for each encapsulation type
-dr_flac is supporting there needs to be a layer sitting on top of the onRead and onSeek callbacks that ensures the bits read from
-the physical Ogg bitstream are converted and delivered in native FLAC format.
-*/
-typedef struct
-{
-    drflac_read_proc onRead;                /* The original onRead callback from drflac_open() and family. */
-    drflac_seek_proc onSeek;                /* The original onSeek callback from drflac_open() and family. */
-    void* pUserData;                        /* The user data passed on onRead and onSeek. This is the user data that was passed on drflac_open() and family. */
-    drflac_uint64 currentBytePos;           /* The position of the byte we are sitting on in the physical byte stream. Used for efficient seeking. */
-    drflac_uint64 firstBytePos;             /* The position of the first byte in the physical bitstream. Points to the start of the "OggS" identifier of the FLAC bos page. */
-    drflac_uint32 serialNumber;             /* The serial number of the FLAC audio pages. This is determined by the initial header page that was read during initialization. */
-    drflac_ogg_page_header bosPageHeader;   /* Used for seeking. */
-    drflac_ogg_page_header currentPageHeader;
-    drflac_uint32 bytesRemainingInPage;
-    drflac_uint32 pageDataSize;
-    drflac_uint8 pageData[DRFLAC_OGG_MAX_PAGE_SIZE];
-} drflac_oggbs; /* oggbs = Ogg Bitstream */
-
-static size_t drflac_oggbs__read_physical(drflac_oggbs* oggbs, void* bufferOut, size_t bytesToRead)
-{
-    size_t bytesActuallyRead = oggbs->onRead(oggbs->pUserData, bufferOut, bytesToRead);
-    oggbs->currentBytePos += bytesActuallyRead;
-
-    return bytesActuallyRead;
-}
-
-static drflac_bool32 drflac_oggbs__seek_physical(drflac_oggbs* oggbs, drflac_uint64 offset, drflac_seek_origin origin)
-{
-    if (origin == drflac_seek_origin_start) {
-        if (offset <= 0x7FFFFFFF) {
-            if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_start)) {
-                return DRFLAC_FALSE;
-            }
-            oggbs->currentBytePos = offset;
-
-            return DRFLAC_TRUE;
-        } else {
-            if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) {
-                return DRFLAC_FALSE;
-            }
-            oggbs->currentBytePos = offset;
-
-            return drflac_oggbs__seek_physical(oggbs, offset - 0x7FFFFFFF, drflac_seek_origin_current);
-        }
-    } else {
-        while (offset > 0x7FFFFFFF) {
-            if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_current)) {
-                return DRFLAC_FALSE;
-            }
-            oggbs->currentBytePos += 0x7FFFFFFF;
-            offset -= 0x7FFFFFFF;
-        }
-
-        if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_current)) {    /* <-- Safe cast thanks to the loop above. */
-            return DRFLAC_FALSE;
-        }
-        oggbs->currentBytePos += offset;
-
-        return DRFLAC_TRUE;
-    }
-}
-
-static drflac_bool32 drflac_oggbs__goto_next_page(drflac_oggbs* oggbs, drflac_ogg_crc_mismatch_recovery recoveryMethod)
-{
-    drflac_ogg_page_header header;
-    for (;;) {
-        drflac_uint32 crc32 = 0;
-        drflac_uint32 bytesRead;
-        drflac_uint32 pageBodySize;
-#ifndef DR_FLAC_NO_CRC
-        drflac_uint32 actualCRC32;
-#endif
-
-        if (drflac_ogg__read_page_header(oggbs->onRead, oggbs->pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
-            return DRFLAC_FALSE;
-        }
-        oggbs->currentBytePos += bytesRead;
-
-        pageBodySize = drflac_ogg__get_page_body_size(&header);
-        if (pageBodySize > DRFLAC_OGG_MAX_PAGE_SIZE) {
-            continue;   /* Invalid page size. Assume it's corrupted and just move to the next page. */
-        }
-
-        if (header.serialNumber != oggbs->serialNumber) {
-            /* It's not a FLAC page. Skip it. */
-            if (pageBodySize > 0 && !drflac_oggbs__seek_physical(oggbs, pageBodySize, drflac_seek_origin_current)) {
-                return DRFLAC_FALSE;
-            }
-            continue;
-        }
-
-
-        /* We need to read the entire page and then do a CRC check on it. If there's a CRC mismatch we need to skip this page. */
-        if (drflac_oggbs__read_physical(oggbs, oggbs->pageData, pageBodySize) != pageBodySize) {
-            return DRFLAC_FALSE;
-        }
-        oggbs->pageDataSize = pageBodySize;
-
-#ifndef DR_FLAC_NO_CRC
-        actualCRC32 = drflac_crc32_buffer(crc32, oggbs->pageData, oggbs->pageDataSize);
-        if (actualCRC32 != header.checksum) {
-            if (recoveryMethod == drflac_ogg_recover_on_crc_mismatch) {
-                continue;   /* CRC mismatch. Skip this page. */
-            } else {
-                /*
-                Even though we are failing on a CRC mismatch, we still want our stream to be in a good state. Therefore we
-                go to the next valid page to ensure we're in a good state, but return false to let the caller know that the
-                seek did not fully complete.
-                */
-                drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch);
-                return DRFLAC_FALSE;
-            }
-        }
-#else
-        (void)recoveryMethod;   /* <-- Silence a warning. */
-#endif
-
-        oggbs->currentPageHeader = header;
-        oggbs->bytesRemainingInPage = pageBodySize;
-        return DRFLAC_TRUE;
-    }
-}
-
-/* Function below is unused at the moment, but I might be re-adding it later. */
-#if 0
-static drflac_uint8 drflac_oggbs__get_current_segment_index(drflac_oggbs* oggbs, drflac_uint8* pBytesRemainingInSeg)
-{
-    drflac_uint32 bytesConsumedInPage = drflac_ogg__get_page_body_size(&oggbs->currentPageHeader) - oggbs->bytesRemainingInPage;
-    drflac_uint8 iSeg = 0;
-    drflac_uint32 iByte = 0;
-    while (iByte < bytesConsumedInPage) {
-        drflac_uint8 segmentSize = oggbs->currentPageHeader.segmentTable[iSeg];
-        if (iByte + segmentSize > bytesConsumedInPage) {
-            break;
-        } else {
-            iSeg += 1;
-            iByte += segmentSize;
-        }
-    }
-
-    *pBytesRemainingInSeg = oggbs->currentPageHeader.segmentTable[iSeg] - (drflac_uint8)(bytesConsumedInPage - iByte);
-    return iSeg;
-}
-
-static drflac_bool32 drflac_oggbs__seek_to_next_packet(drflac_oggbs* oggbs)
-{
-    /* The current packet ends when we get to the segment with a lacing value of < 255 which is not at the end of a page. */
-    for (;;) {
-        drflac_bool32 atEndOfPage = DRFLAC_FALSE;
-
-        drflac_uint8 bytesRemainingInSeg;
-        drflac_uint8 iFirstSeg = drflac_oggbs__get_current_segment_index(oggbs, &bytesRemainingInSeg);
-
-        drflac_uint32 bytesToEndOfPacketOrPage = bytesRemainingInSeg;
-        for (drflac_uint8 iSeg = iFirstSeg; iSeg < oggbs->currentPageHeader.segmentCount; ++iSeg) {
-            drflac_uint8 segmentSize = oggbs->currentPageHeader.segmentTable[iSeg];
-            if (segmentSize < 255) {
-                if (iSeg == oggbs->currentPageHeader.segmentCount-1) {
-                    atEndOfPage = DRFLAC_TRUE;
-                }
-
-                break;
-            }
-
-            bytesToEndOfPacketOrPage += segmentSize;
-        }
-
-        /*
-        At this point we will have found either the packet or the end of the page. If were at the end of the page we'll
-        want to load the next page and keep searching for the end of the packet.
-        */
-        drflac_oggbs__seek_physical(oggbs, bytesToEndOfPacketOrPage, drflac_seek_origin_current);
-        oggbs->bytesRemainingInPage -= bytesToEndOfPacketOrPage;
-
-        if (atEndOfPage) {
-            /*
-            We're potentially at the next packet, but we need to check the next page first to be sure because the packet may
-            straddle pages.
-            */
-            if (!drflac_oggbs__goto_next_page(oggbs)) {
-                return DRFLAC_FALSE;
-            }
-
-            /* If it's a fresh packet it most likely means we're at the next packet. */
-            if ((oggbs->currentPageHeader.headerType & 0x01) == 0) {
-                return DRFLAC_TRUE;
-            }
-        } else {
-            /* We're at the next packet. */
-            return DRFLAC_TRUE;
-        }
-    }
-}
-
-static drflac_bool32 drflac_oggbs__seek_to_next_frame(drflac_oggbs* oggbs)
-{
-    /* The bitstream should be sitting on the first byte just after the header of the frame. */
-
-    /* What we're actually doing here is seeking to the start of the next packet. */
-    return drflac_oggbs__seek_to_next_packet(oggbs);
-}
-#endif
-
-static size_t drflac__on_read_ogg(void* pUserData, void* bufferOut, size_t bytesToRead)
-{
-    drflac_oggbs* oggbs = (drflac_oggbs*)pUserData;
-    drflac_uint8* pRunningBufferOut = (drflac_uint8*)bufferOut;
-    size_t bytesRead = 0;
-
-    DRFLAC_ASSERT(oggbs != NULL);
-    DRFLAC_ASSERT(pRunningBufferOut != NULL);
-
-    /* Reading is done page-by-page. If we've run out of bytes in the page we need to move to the next one. */
-    while (bytesRead < bytesToRead) {
-        size_t bytesRemainingToRead = bytesToRead - bytesRead;
-
-        if (oggbs->bytesRemainingInPage >= bytesRemainingToRead) {
-            DRFLAC_COPY_MEMORY(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), bytesRemainingToRead);
-            bytesRead += bytesRemainingToRead;
-            oggbs->bytesRemainingInPage -= (drflac_uint32)bytesRemainingToRead;
-            break;
-        }
-
-        /* If we get here it means some of the requested data is contained in the next pages. */
-        if (oggbs->bytesRemainingInPage > 0) {
-            DRFLAC_COPY_MEMORY(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), oggbs->bytesRemainingInPage);
-            bytesRead += oggbs->bytesRemainingInPage;
-            pRunningBufferOut += oggbs->bytesRemainingInPage;
-            oggbs->bytesRemainingInPage = 0;
-        }
-
-        DRFLAC_ASSERT(bytesRemainingToRead > 0);
-        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
-            break;  /* Failed to go to the next page. Might have simply hit the end of the stream. */
-        }
-    }
-
-    return bytesRead;
-}
-
-static drflac_bool32 drflac__on_seek_ogg(void* pUserData, int offset, drflac_seek_origin origin)
-{
-    drflac_oggbs* oggbs = (drflac_oggbs*)pUserData;
-    int bytesSeeked = 0;
-
-    DRFLAC_ASSERT(oggbs != NULL);
-    DRFLAC_ASSERT(offset >= 0);  /* <-- Never seek backwards. */
-
-    /* Seeking is always forward which makes things a lot simpler. */
-    if (origin == drflac_seek_origin_start) {
-        if (!drflac_oggbs__seek_physical(oggbs, (int)oggbs->firstBytePos, drflac_seek_origin_start)) {
-            return DRFLAC_FALSE;
-        }
-
-        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) {
-            return DRFLAC_FALSE;
-        }
-
-        return drflac__on_seek_ogg(pUserData, offset, drflac_seek_origin_current);
-    }
-
-    DRFLAC_ASSERT(origin == drflac_seek_origin_current);
-
-    while (bytesSeeked < offset) {
-        int bytesRemainingToSeek = offset - bytesSeeked;
-        DRFLAC_ASSERT(bytesRemainingToSeek >= 0);
-
-        if (oggbs->bytesRemainingInPage >= (size_t)bytesRemainingToSeek) {
-            bytesSeeked += bytesRemainingToSeek;
-            (void)bytesSeeked;  /* <-- Silence a dead store warning emitted by Clang Static Analyzer. */
-            oggbs->bytesRemainingInPage -= bytesRemainingToSeek;
-            break;
-        }
-
-        /* If we get here it means some of the requested data is contained in the next pages. */
-        if (oggbs->bytesRemainingInPage > 0) {
-            bytesSeeked += (int)oggbs->bytesRemainingInPage;
-            oggbs->bytesRemainingInPage = 0;
-        }
-
-        DRFLAC_ASSERT(bytesRemainingToSeek > 0);
-        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) {
-            /* Failed to go to the next page. We either hit the end of the stream or had a CRC mismatch. */
-            return DRFLAC_FALSE;
-        }
-    }
-
-    return DRFLAC_TRUE;
-}
-
-
-static drflac_bool32 drflac_ogg__seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex)
-{
-    drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
-    drflac_uint64 originalBytePos;
-    drflac_uint64 runningGranulePosition;
-    drflac_uint64 runningFrameBytePos;
-    drflac_uint64 runningPCMFrameCount;
-
-    DRFLAC_ASSERT(oggbs != NULL);
-
-    originalBytePos = oggbs->currentBytePos;   /* For recovery. Points to the OggS identifier. */
-
-    /* First seek to the first frame. */
-    if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes)) {
-        return DRFLAC_FALSE;
-    }
-    oggbs->bytesRemainingInPage = 0;
-
-    runningGranulePosition = 0;
-    for (;;) {
-        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
-            drflac_oggbs__seek_physical(oggbs, originalBytePos, drflac_seek_origin_start);
-            return DRFLAC_FALSE;   /* Never did find that sample... */
-        }
-
-        runningFrameBytePos = oggbs->currentBytePos - drflac_ogg__get_page_header_size(&oggbs->currentPageHeader) - oggbs->pageDataSize;
-        if (oggbs->currentPageHeader.granulePosition >= pcmFrameIndex) {
-            break; /* The sample is somewhere in the previous page. */
-        }
-
-        /*
-        At this point we know the sample is not in the previous page. It could possibly be in this page. For simplicity we
-        disregard any pages that do not begin a fresh packet.
-        */
-        if ((oggbs->currentPageHeader.headerType & 0x01) == 0) {    /* <-- Is it a fresh page? */
-            if (oggbs->currentPageHeader.segmentTable[0] >= 2) {
-                drflac_uint8 firstBytesInPage[2];
-                firstBytesInPage[0] = oggbs->pageData[0];
-                firstBytesInPage[1] = oggbs->pageData[1];
-
-                if ((firstBytesInPage[0] == 0xFF) && (firstBytesInPage[1] & 0xFC) == 0xF8) {    /* <-- Does the page begin with a frame's sync code? */
-                    runningGranulePosition = oggbs->currentPageHeader.granulePosition;
-                }
-
-                continue;
-            }
-        }
-    }
-
-    /*
-    We found the page that that is closest to the sample, so now we need to find it. The first thing to do is seek to the
-    start of that page. In the loop above we checked that it was a fresh page which means this page is also the start of
-    a new frame. This property means that after we've seeked to the page we can immediately start looping over frames until
-    we find the one containing the target sample.
-    */
-    if (!drflac_oggbs__seek_physical(oggbs, runningFrameBytePos, drflac_seek_origin_start)) {
-        return DRFLAC_FALSE;
-    }
-    if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
-        return DRFLAC_FALSE;
-    }
-
-    /*
-    At this point we'll be sitting on the first byte of the frame header of the first frame in the page. We just keep
-    looping over these frames until we find the one containing the sample we're after.
-    */
-    runningPCMFrameCount = runningGranulePosition;
-    for (;;) {
-        /*
-        There are two ways to find the sample and seek past irrelevant frames:
-          1) Use the native FLAC decoder.
-          2) Use Ogg's framing system.
-
-        Both of these options have their own pros and cons. Using the native FLAC decoder is slower because it needs to
-        do a full decode of the frame. Using Ogg's framing system is faster, but more complicated and involves some code
-        duplication for the decoding of frame headers.
-
-        Another thing to consider is that using the Ogg framing system will perform direct seeking of the physical Ogg
-        bitstream. This is important to consider because it means we cannot read data from the drflac_bs object using the
-        standard drflac__*() APIs because that will read in extra data for its own internal caching which in turn breaks
-        the positioning of the read pointer of the physical Ogg bitstream. Therefore, anything that would normally be read
-        using the native FLAC decoding APIs, such as drflac__read_next_flac_frame_header(), need to be re-implemented so as to
-        avoid the use of the drflac_bs object.
-
-        Considering these issues, I have decided to use the slower native FLAC decoding method for the following reasons:
-          1) Seeking is already partially accelerated using Ogg's paging system in the code block above.
-          2) Seeking in an Ogg encapsulated FLAC stream is probably quite uncommon.
-          3) Simplicity.
-        */
-        drflac_uint64 firstPCMFrameInFLACFrame = 0;
-        drflac_uint64 lastPCMFrameInFLACFrame = 0;
-        drflac_uint64 pcmFrameCountInThisFrame;
-
-        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
-            return DRFLAC_FALSE;
-        }
-
-        drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame);
-
-        pcmFrameCountInThisFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
-
-        /* If we are seeking to the end of the file and we've just hit it, we're done. */
-        if (pcmFrameIndex == pFlac->totalPCMFrameCount && (runningPCMFrameCount + pcmFrameCountInThisFrame) == pFlac->totalPCMFrameCount) {
-            drflac_result result = drflac__decode_flac_frame(pFlac);
-            if (result == DRFLAC_SUCCESS) {
-                pFlac->currentPCMFrame = pcmFrameIndex;
-                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
-                return DRFLAC_TRUE;
-            } else {
-                return DRFLAC_FALSE;
-            }
-        }
-
-        if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFrame)) {
-            /*
-            The sample should be in this FLAC frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend
-            it never existed and keep iterating.
-            */
-            drflac_result result = drflac__decode_flac_frame(pFlac);
-            if (result == DRFLAC_SUCCESS) {
-                /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */
-                drflac_uint64 pcmFramesToDecode = (size_t)(pcmFrameIndex - runningPCMFrameCount);    /* <-- Safe cast because the maximum number of samples in a frame is 65535. */
-                if (pcmFramesToDecode == 0) {
-                    return DRFLAC_TRUE;
-                }
-
-                pFlac->currentPCMFrame = runningPCMFrameCount;
-
-                return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;  /* <-- If this fails, something bad has happened (it should never fail). */
-            } else {
-                if (result == DRFLAC_CRC_MISMATCH) {
-                    continue;   /* CRC mismatch. Pretend this frame never existed. */
-                } else {
-                    return DRFLAC_FALSE;
-                }
-            }
-        } else {
-            /*
-            It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
-            frame never existed and leave the running sample count untouched.
-            */
-            drflac_result result = drflac__seek_to_next_flac_frame(pFlac);
-            if (result == DRFLAC_SUCCESS) {
-                runningPCMFrameCount += pcmFrameCountInThisFrame;
-            } else {
-                if (result == DRFLAC_CRC_MISMATCH) {
-                    continue;   /* CRC mismatch. Pretend this frame never existed. */
-                } else {
-                    return DRFLAC_FALSE;
-                }
-            }
-        }
-    }
-}
-
-
-
-static drflac_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed)
-{
-    drflac_ogg_page_header header;
-    drflac_uint32 crc32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32;
-    drflac_uint32 bytesRead = 0;
-
-    /* Pre Condition: The bit stream should be sitting just past the 4-byte OggS capture pattern. */
-    (void)relaxed;
-
-    pInit->container = drflac_container_ogg;
-    pInit->oggFirstBytePos = 0;
-
-    /*
-    We'll get here if the first 4 bytes of the stream were the OggS capture pattern, however it doesn't necessarily mean the
-    stream includes FLAC encoded audio. To check for this we need to scan the beginning-of-stream page markers and check if
-    any match the FLAC specification. Important to keep in mind that the stream may be multiplexed.
-    */
-    if (drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
-        return DRFLAC_FALSE;
-    }
-    pInit->runningFilePos += bytesRead;
-
-    for (;;) {
-        int pageBodySize;
-
-        /* Break if we're past the beginning of stream page. */
-        if ((header.headerType & 0x02) == 0) {
-            return DRFLAC_FALSE;
-        }
-
-        /* Check if it's a FLAC header. */
-        pageBodySize = drflac_ogg__get_page_body_size(&header);
-        if (pageBodySize == 51) {   /* 51 = the lacing value of the FLAC header packet. */
-            /* It could be a FLAC page... */
-            drflac_uint32 bytesRemainingInPage = pageBodySize;
-            drflac_uint8 packetType;
-
-            if (onRead(pUserData, &packetType, 1) != 1) {
-                return DRFLAC_FALSE;
-            }
-
-            bytesRemainingInPage -= 1;
-            if (packetType == 0x7F) {
-                /* Increasingly more likely to be a FLAC page... */
-                drflac_uint8 sig[4];
-                if (onRead(pUserData, sig, 4) != 4) {
-                    return DRFLAC_FALSE;
-                }
-
-                bytesRemainingInPage -= 4;
-                if (sig[0] == 'F' && sig[1] == 'L' && sig[2] == 'A' && sig[3] == 'C') {
-                    /* Almost certainly a FLAC page... */
-                    drflac_uint8 mappingVersion[2];
-                    if (onRead(pUserData, mappingVersion, 2) != 2) {
-                        return DRFLAC_FALSE;
-                    }
-
-                    if (mappingVersion[0] != 1) {
-                        return DRFLAC_FALSE;   /* Only supporting version 1.x of the Ogg mapping. */
-                    }
-
-                    /*
-                    The next 2 bytes are the non-audio packets, not including this one. We don't care about this because we're going to
-                    be handling it in a generic way based on the serial number and packet types.
-                    */
-                    if (!onSeek(pUserData, 2, drflac_seek_origin_current)) {
-                        return DRFLAC_FALSE;
-                    }
-
-                    /* Expecting the native FLAC signature "fLaC". */
-                    if (onRead(pUserData, sig, 4) != 4) {
-                        return DRFLAC_FALSE;
-                    }
-
-                    if (sig[0] == 'f' && sig[1] == 'L' && sig[2] == 'a' && sig[3] == 'C') {
-                        /* The remaining data in the page should be the STREAMINFO block. */
-                        drflac_streaminfo streaminfo;
-                        drflac_uint8 isLastBlock;
-                        drflac_uint8 blockType;
-                        drflac_uint32 blockSize;
-                        if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) {
-                            return DRFLAC_FALSE;
-                        }
-
-                        if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) {
-                            return DRFLAC_FALSE;    /* Invalid block type. First block must be the STREAMINFO block. */
-                        }
-
-                        if (drflac__read_streaminfo(onRead, pUserData, &streaminfo)) {
-                            /* Success! */
-                            pInit->hasStreamInfoBlock      = DRFLAC_TRUE;
-                            pInit->sampleRate              = streaminfo.sampleRate;
-                            pInit->channels                = streaminfo.channels;
-                            pInit->bitsPerSample           = streaminfo.bitsPerSample;
-                            pInit->totalPCMFrameCount      = streaminfo.totalPCMFrameCount;
-                            pInit->maxBlockSizeInPCMFrames = streaminfo.maxBlockSizeInPCMFrames;
-                            pInit->hasMetadataBlocks       = !isLastBlock;
-
-                            if (onMeta) {
-                                drflac_metadata metadata;
-                                metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO;
-                                metadata.pRawData = NULL;
-                                metadata.rawDataSize = 0;
-                                metadata.data.streaminfo = streaminfo;
-                                onMeta(pUserDataMD, &metadata);
-                            }
-
-                            pInit->runningFilePos  += pageBodySize;
-                            pInit->oggFirstBytePos  = pInit->runningFilePos - 79;   /* Subtracting 79 will place us right on top of the "OggS" identifier of the FLAC bos page. */
-                            pInit->oggSerial        = header.serialNumber;
-                            pInit->oggBosHeader     = header;
-                            break;
-                        } else {
-                            /* Failed to read STREAMINFO block. Aww, so close... */
-                            return DRFLAC_FALSE;
-                        }
-                    } else {
-                        /* Invalid file. */
-                        return DRFLAC_FALSE;
-                    }
-                } else {
-                    /* Not a FLAC header. Skip it. */
-                    if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) {
-                        return DRFLAC_FALSE;
-                    }
-                }
-            } else {
-                /* Not a FLAC header. Seek past the entire page and move on to the next. */
-                if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) {
-                    return DRFLAC_FALSE;
-                }
-            }
-        } else {
-            if (!onSeek(pUserData, pageBodySize, drflac_seek_origin_current)) {
-                return DRFLAC_FALSE;
-            }
-        }
-
-        pInit->runningFilePos += pageBodySize;
-
-
-        /* Read the header of the next page. */
-        if (drflac_ogg__read_page_header(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
-            return DRFLAC_FALSE;
-        }
-        pInit->runningFilePos += bytesRead;
-    }
-
-    /*
-    If we get here it means we found a FLAC audio stream. We should be sitting on the first byte of the header of the next page. The next
-    packets in the FLAC logical stream contain the metadata. The only thing left to do in the initialization phase for Ogg is to create the
-    Ogg bistream object.
-    */
-    pInit->hasMetadataBlocks = DRFLAC_TRUE;    /* <-- Always have at least VORBIS_COMMENT metadata block. */
-    return DRFLAC_TRUE;
-}
-#endif
-
-static drflac_bool32 drflac__init_private(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD)
-{
-    drflac_bool32 relaxed;
-    drflac_uint8 id[4];
-
-    if (pInit == NULL || onRead == NULL || onSeek == NULL) {
-        return DRFLAC_FALSE;
-    }
-
-    DRFLAC_ZERO_MEMORY(pInit, sizeof(*pInit));
-    pInit->onRead       = onRead;
-    pInit->onSeek       = onSeek;
-    pInit->onMeta       = onMeta;
-    pInit->container    = container;
-    pInit->pUserData    = pUserData;
-    pInit->pUserDataMD  = pUserDataMD;
-
-    pInit->bs.onRead    = onRead;
-    pInit->bs.onSeek    = onSeek;
-    pInit->bs.pUserData = pUserData;
-    drflac__reset_cache(&pInit->bs);
-
-
-    /* If the container is explicitly defined then we can try opening in relaxed mode. */
-    relaxed = container != drflac_container_unknown;
-
-    /* Skip over any ID3 tags. */
-    for (;;) {
-        if (onRead(pUserData, id, 4) != 4) {
-            return DRFLAC_FALSE;    /* Ran out of data. */
-        }
-        pInit->runningFilePos += 4;
-
-        if (id[0] == 'I' && id[1] == 'D' && id[2] == '3') {
-            drflac_uint8 header[6];
-            drflac_uint8 flags;
-            drflac_uint32 headerSize;
-
-            if (onRead(pUserData, header, 6) != 6) {
-                return DRFLAC_FALSE;    /* Ran out of data. */
-            }
-            pInit->runningFilePos += 6;
-
-            flags = header[1];
-
-            DRFLAC_COPY_MEMORY(&headerSize, header+2, 4);
-            headerSize = drflac__unsynchsafe_32(drflac__be2host_32(headerSize));
-            if (flags & 0x10) {
-                headerSize += 10;
-            }
-
-            if (!onSeek(pUserData, headerSize, drflac_seek_origin_current)) {
-                return DRFLAC_FALSE;    /* Failed to seek past the tag. */
-            }
-            pInit->runningFilePos += headerSize;
-        } else {
-            break;
-        }
-    }
-
-    if (id[0] == 'f' && id[1] == 'L' && id[2] == 'a' && id[3] == 'C') {
-        return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
-    }
-#ifndef DR_FLAC_NO_OGG
-    if (id[0] == 'O' && id[1] == 'g' && id[2] == 'g' && id[3] == 'S') {
-        return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
-    }
-#endif
-
-    /* If we get here it means we likely don't have a header. Try opening in relaxed mode, if applicable. */
-    if (relaxed) {
-        if (container == drflac_container_native) {
-            return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
-        }
-#ifndef DR_FLAC_NO_OGG
-        if (container == drflac_container_ogg) {
-            return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
-        }
-#endif
-    }
-
-    /* Unsupported container. */
-    return DRFLAC_FALSE;
-}
-
-static void drflac__init_from_info(drflac* pFlac, const drflac_init_info* pInit)
-{
-    DRFLAC_ASSERT(pFlac != NULL);
-    DRFLAC_ASSERT(pInit != NULL);
-
-    DRFLAC_ZERO_MEMORY(pFlac, sizeof(*pFlac));
-    pFlac->bs                      = pInit->bs;
-    pFlac->onMeta                  = pInit->onMeta;
-    pFlac->pUserDataMD             = pInit->pUserDataMD;
-    pFlac->maxBlockSizeInPCMFrames = pInit->maxBlockSizeInPCMFrames;
-    pFlac->sampleRate              = pInit->sampleRate;
-    pFlac->channels                = (drflac_uint8)pInit->channels;
-    pFlac->bitsPerSample           = (drflac_uint8)pInit->bitsPerSample;
-    pFlac->totalPCMFrameCount      = pInit->totalPCMFrameCount;
-    pFlac->container               = pInit->container;
-}
-
-
-static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    static drflac_init_info init;
-    drflac_uint32 allocationSize;
-    drflac_uint32 wholeSIMDVectorCountPerChannel;
-    drflac_uint32 decodedSamplesAllocationSize;
-#ifndef DR_FLAC_NO_OGG
-    static drflac_oggbs oggbs;
-#endif
-    drflac_uint64 firstFramePos;
-    drflac_uint64 seektablePos;
-    drflac_uint32 seektableSize;
-    drflac_allocation_callbacks allocationCallbacks;
-    drflac* pFlac;
-
-    /* CPU support first. */
-    drflac__init_cpu_caps();
-
-    if (!drflac__init_private(&init, onRead, onSeek, onMeta, container, pUserData, pUserDataMD)) {
-        return NULL;
-    }
-
-    if (pAllocationCallbacks != NULL) {
-        allocationCallbacks = *pAllocationCallbacks;
-        if (allocationCallbacks.onFree == NULL || (allocationCallbacks.onMalloc == NULL && allocationCallbacks.onRealloc == NULL)) {
-            return NULL;    /* Invalid allocation callbacks. */
-        }
-    } else {
-        allocationCallbacks.pUserData = NULL;
-        allocationCallbacks.onMalloc  = drflac__malloc_default;
-        allocationCallbacks.onRealloc = drflac__realloc_default;
-        allocationCallbacks.onFree    = drflac__free_default;
-    }
-
-
-    /*
-    The size of the allocation for the drflac object needs to be large enough to fit the following:
-      1) The main members of the drflac structure
-      2) A block of memory large enough to store the decoded samples of the largest frame in the stream
-      3) If the container is Ogg, a drflac_oggbs object
-
-    The complicated part of the allocation is making sure there's enough room the decoded samples, taking into consideration
-    the different SIMD instruction sets.
-    */
-    allocationSize = sizeof(drflac);
-
-    /*
-    The allocation size for decoded frames depends on the number of 32-bit integers that fit inside the largest SIMD vector
-    we are supporting.
-    */
-    if ((init.maxBlockSizeInPCMFrames % (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) == 0) {
-        wholeSIMDVectorCountPerChannel = (init.maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32)));
-    } else {
-        wholeSIMDVectorCountPerChannel = (init.maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) + 1;
-    }
-
-    decodedSamplesAllocationSize = wholeSIMDVectorCountPerChannel * DRFLAC_MAX_SIMD_VECTOR_SIZE * init.channels;
-
-    allocationSize += decodedSamplesAllocationSize;
-    allocationSize += DRFLAC_MAX_SIMD_VECTOR_SIZE;  /* Allocate extra bytes to ensure we have enough for alignment. */
-
-#ifndef DR_FLAC_NO_OGG
-    /* There's additional data required for Ogg streams. */
-    if (init.container == drflac_container_ogg) {
-        allocationSize += sizeof(drflac_oggbs);
-    }
-
-    DRFLAC_ZERO_MEMORY(&oggbs, sizeof(oggbs));
-    if (init.container == drflac_container_ogg) {
-        oggbs.onRead = onRead;
-        oggbs.onSeek = onSeek;
-        oggbs.pUserData = pUserData;
-        oggbs.currentBytePos = init.oggFirstBytePos;
-        oggbs.firstBytePos = init.oggFirstBytePos;
-        oggbs.serialNumber = init.oggSerial;
-        oggbs.bosPageHeader = init.oggBosHeader;
-        oggbs.bytesRemainingInPage = 0;
-    }
-#endif
-
-    /*
-    This part is a bit awkward. We need to load the seektable so that it can be referenced in-memory, but I want the drflac object to
-    consist of only a single heap allocation. To this, the size of the seek table needs to be known, which we determine when reading
-    and decoding the metadata.
-    */
-    firstFramePos = 42;   /* <-- We know we are at byte 42 at this point. */
-    seektablePos  = 0;
-    seektableSize = 0;
-    if (init.hasMetadataBlocks) {
-        drflac_read_proc onReadOverride = onRead;
-        drflac_seek_proc onSeekOverride = onSeek;
-        void* pUserDataOverride = pUserData;
-
-#ifndef DR_FLAC_NO_OGG
-        if (init.container == drflac_container_ogg) {
-            onReadOverride = drflac__on_read_ogg;
-            onSeekOverride = drflac__on_seek_ogg;
-            pUserDataOverride = (void*)&oggbs;
-        }
-#endif
-
-        if (!drflac__read_and_decode_metadata(onReadOverride, onSeekOverride, onMeta, pUserDataOverride, pUserDataMD, &firstFramePos, &seektablePos, &seektableSize, &allocationCallbacks)) {
-            return NULL;
-        }
-
-        allocationSize += seektableSize;
-    }
-
-
-    pFlac = (drflac*)drflac__malloc_from_callbacks(allocationSize, &allocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    drflac__init_from_info(pFlac, &init);
-    pFlac->allocationCallbacks = allocationCallbacks;
-    pFlac->pDecodedSamples = (drflac_int32*)drflac_align((size_t)pFlac->pExtraData, DRFLAC_MAX_SIMD_VECTOR_SIZE);
-
-#ifndef DR_FLAC_NO_OGG
-    if (init.container == drflac_container_ogg) {
-        drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + seektableSize);
-        *pInternalOggbs = oggbs;
-
-        /* The Ogg bistream needs to be layered on top of the original bitstream. */
-        pFlac->bs.onRead = drflac__on_read_ogg;
-        pFlac->bs.onSeek = drflac__on_seek_ogg;
-        pFlac->bs.pUserData = (void*)pInternalOggbs;
-        pFlac->_oggbs = (void*)pInternalOggbs;
-    }
-#endif
-
-    pFlac->firstFLACFramePosInBytes = firstFramePos;
-
-    /* NOTE: Seektables are not currently compatible with Ogg encapsulation (Ogg has its own accelerated seeking system). I may change this later, so I'm leaving this here for now. */
-#ifndef DR_FLAC_NO_OGG
-    if (init.container == drflac_container_ogg)
-    {
-        pFlac->pSeekpoints = NULL;
-        pFlac->seekpointCount = 0;
-    }
-    else
-#endif
-    {
-        /* If we have a seektable we need to load it now, making sure we move back to where we were previously. */
-        if (seektablePos != 0) {
-            pFlac->seekpointCount = seektableSize / sizeof(*pFlac->pSeekpoints);
-            pFlac->pSeekpoints = (drflac_seekpoint*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize);
-
-            DRFLAC_ASSERT(pFlac->bs.onSeek != NULL);
-            DRFLAC_ASSERT(pFlac->bs.onRead != NULL);
-
-            /* Seek to the seektable, then just read directly into our seektable buffer. */
-            if (pFlac->bs.onSeek(pFlac->bs.pUserData, (int)seektablePos, drflac_seek_origin_start)) {
-                if (pFlac->bs.onRead(pFlac->bs.pUserData, pFlac->pSeekpoints, seektableSize) == seektableSize) {
-                    /* Endian swap. */
-                    drflac_uint32 iSeekpoint;
-                    for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) {
-                        pFlac->pSeekpoints[iSeekpoint].firstPCMFrame   = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].firstPCMFrame);
-                        pFlac->pSeekpoints[iSeekpoint].flacFrameOffset = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].flacFrameOffset);
-                        pFlac->pSeekpoints[iSeekpoint].pcmFrameCount   = drflac__be2host_16(pFlac->pSeekpoints[iSeekpoint].pcmFrameCount);
-                    }
-                } else {
-                    /* Failed to read the seektable. Pretend we don't have one. */
-                    pFlac->pSeekpoints = NULL;
-                    pFlac->seekpointCount = 0;
-                }
-
-                /* We need to seek back to where we were. If this fails it's a critical error. */
-                if (!pFlac->bs.onSeek(pFlac->bs.pUserData, (int)pFlac->firstFLACFramePosInBytes, drflac_seek_origin_start)) {
-                    drflac__free_from_callbacks(pFlac, &allocationCallbacks);
-                    return NULL;
-                }
-            } else {
-                /* Failed to seek to the seektable. Ominous sign, but for now we can just pretend we don't have one. */
-                pFlac->pSeekpoints = NULL;
-                pFlac->seekpointCount = 0;
-            }
-        }
-    }
-
-
-    /*
-    If we get here, but don't have a STREAMINFO block, it means we've opened the stream in relaxed mode and need to decode
-    the first frame.
-    */
-    if (!init.hasStreamInfoBlock) {
-        pFlac->currentFLACFrame.header = init.firstFrameHeader;
-        for (;;) {
-            drflac_result result = drflac__decode_flac_frame(pFlac);
-            if (result == DRFLAC_SUCCESS) {
-                break;
-            } else {
-                if (result == DRFLAC_CRC_MISMATCH) {
-                    if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
-                        drflac__free_from_callbacks(pFlac, &allocationCallbacks);
-                        return NULL;
-                    }
-                    continue;
-                } else {
-                    drflac__free_from_callbacks(pFlac, &allocationCallbacks);
-                    return NULL;
-                }
-            }
-        }
-    }
-
-    return pFlac;
-}
-
-
-
-#ifndef DR_FLAC_NO_STDIO
-#include <stdio.h>
-#include <wchar.h>      /* For wcslen(), wcsrtombs() */
-
-/* drflac_result_from_errno() is only used for fopen() and wfopen() so putting it inside DR_WAV_NO_STDIO for now. If something else needs this later we can move it out. */
-#include <errno.h>
-static drflac_result drflac_result_from_errno(int e)
-{
-    switch (e)
-    {
-        case 0: return DRFLAC_SUCCESS;
-    #ifdef EPERM
-        case EPERM: return DRFLAC_INVALID_OPERATION;
-    #endif
-    #ifdef ENOENT
-        case ENOENT: return DRFLAC_DOES_NOT_EXIST;
-    #endif
-    #ifdef ESRCH
-        case ESRCH: return DRFLAC_DOES_NOT_EXIST;
-    #endif
-    #ifdef EINTR
-        case EINTR: return DRFLAC_INTERRUPT;
-    #endif
-    #ifdef EIO
-        case EIO: return DRFLAC_IO_ERROR;
-    #endif
-    #ifdef ENXIO
-        case ENXIO: return DRFLAC_DOES_NOT_EXIST;
-    #endif
-    #ifdef E2BIG
-        case E2BIG: return DRFLAC_INVALID_ARGS;
-    #endif
-    #ifdef ENOEXEC
-        case ENOEXEC: return DRFLAC_INVALID_FILE;
-    #endif
-    #ifdef EBADF
-        case EBADF: return DRFLAC_INVALID_FILE;
-    #endif
-    #ifdef ECHILD
-        case ECHILD: return DRFLAC_ERROR;
-    #endif
-    #ifdef EAGAIN
-        case EAGAIN: return DRFLAC_UNAVAILABLE;
-    #endif
-    #ifdef ENOMEM
-        case ENOMEM: return DRFLAC_OUT_OF_MEMORY;
-    #endif
-    #ifdef EACCES
-        case EACCES: return DRFLAC_ACCESS_DENIED;
-    #endif
-    #ifdef EFAULT
-        case EFAULT: return DRFLAC_BAD_ADDRESS;
-    #endif
-    #ifdef ENOTBLK
-        case ENOTBLK: return DRFLAC_ERROR;
-    #endif
-    #ifdef EBUSY
-        case EBUSY: return DRFLAC_BUSY;
-    #endif
-    #ifdef EEXIST
-        case EEXIST: return DRFLAC_ALREADY_EXISTS;
-    #endif
-    #ifdef EXDEV
-        case EXDEV: return DRFLAC_ERROR;
-    #endif
-    #ifdef ENODEV
-        case ENODEV: return DRFLAC_DOES_NOT_EXIST;
-    #endif
-    #ifdef ENOTDIR
-        case ENOTDIR: return DRFLAC_NOT_DIRECTORY;
-    #endif
-    #ifdef EISDIR
-        case EISDIR: return DRFLAC_IS_DIRECTORY;
-    #endif
-    #ifdef EINVAL
-        case EINVAL: return DRFLAC_INVALID_ARGS;
-    #endif
-    #ifdef ENFILE
-        case ENFILE: return DRFLAC_TOO_MANY_OPEN_FILES;
-    #endif
-    #ifdef EMFILE
-        case EMFILE: return DRFLAC_TOO_MANY_OPEN_FILES;
-    #endif
-    #ifdef ENOTTY
-        case ENOTTY: return DRFLAC_INVALID_OPERATION;
-    #endif
-    #ifdef ETXTBSY
-        case ETXTBSY: return DRFLAC_BUSY;
-    #endif
-    #ifdef EFBIG
-        case EFBIG: return DRFLAC_TOO_BIG;
-    #endif
-    #ifdef ENOSPC
-        case ENOSPC: return DRFLAC_NO_SPACE;
-    #endif
-    #ifdef ESPIPE
-        case ESPIPE: return DRFLAC_BAD_SEEK;
-    #endif
-    #ifdef EROFS
-        case EROFS: return DRFLAC_ACCESS_DENIED;
-    #endif
-    #ifdef EMLINK
-        case EMLINK: return DRFLAC_TOO_MANY_LINKS;
-    #endif
-    #ifdef EPIPE
-        case EPIPE: return DRFLAC_BAD_PIPE;
-    #endif
-    #ifdef EDOM
-        case EDOM: return DRFLAC_OUT_OF_RANGE;
-    #endif
-    #ifdef ERANGE
-        case ERANGE: return DRFLAC_OUT_OF_RANGE;
-    #endif
-    #ifdef EDEADLK
-        case EDEADLK: return DRFLAC_DEADLOCK;
-    #endif
-    #ifdef ENAMETOOLONG
-        case ENAMETOOLONG: return DRFLAC_PATH_TOO_LONG;
-    #endif
-    #ifdef ENOLCK
-        case ENOLCK: return DRFLAC_ERROR;
-    #endif
-    #ifdef ENOSYS
-        case ENOSYS: return DRFLAC_NOT_IMPLEMENTED;
-    #endif
-    #ifdef ENOTEMPTY
-        case ENOTEMPTY: return DRFLAC_DIRECTORY_NOT_EMPTY;
-    #endif
-    #ifdef ELOOP
-        case ELOOP: return DRFLAC_TOO_MANY_LINKS;
-    #endif
-    #ifdef ENOMSG
-        case ENOMSG: return DRFLAC_NO_MESSAGE;
-    #endif
-    #ifdef EIDRM
-        case EIDRM: return DRFLAC_ERROR;
-    #endif
-    #ifdef ECHRNG
-        case ECHRNG: return DRFLAC_ERROR;
-    #endif
-    #ifdef EL2NSYNC
-        case EL2NSYNC: return DRFLAC_ERROR;
-    #endif
-    #ifdef EL3HLT
-        case EL3HLT: return DRFLAC_ERROR;
-    #endif
-    #ifdef EL3RST
-        case EL3RST: return DRFLAC_ERROR;
-    #endif
-    #ifdef ELNRNG
-        case ELNRNG: return DRFLAC_OUT_OF_RANGE;
-    #endif
-    #ifdef EUNATCH
-        case EUNATCH: return DRFLAC_ERROR;
-    #endif
-    #ifdef ENOCSI
-        case ENOCSI: return DRFLAC_ERROR;
-    #endif
-    #ifdef EL2HLT
-        case EL2HLT: return DRFLAC_ERROR;
-    #endif
-    #ifdef EBADE
-        case EBADE: return DRFLAC_ERROR;
-    #endif
-    #ifdef EBADR
-        case EBADR: return DRFLAC_ERROR;
-    #endif
-    #ifdef EXFULL
-        case EXFULL: return DRFLAC_ERROR;
-    #endif
-    #ifdef ENOANO
-        case ENOANO: return DRFLAC_ERROR;
-    #endif
-    #ifdef EBADRQC
-        case EBADRQC: return DRFLAC_ERROR;
-    #endif
-    #ifdef EBADSLT
-        case EBADSLT: return DRFLAC_ERROR;
-    #endif
-    #ifdef EBFONT
-        case EBFONT: return DRFLAC_INVALID_FILE;
-    #endif
-    #ifdef ENOSTR
-        case ENOSTR: return DRFLAC_ERROR;
-    #endif
-    #ifdef ENODATA
-        case ENODATA: return DRFLAC_NO_DATA_AVAILABLE;
-    #endif
-    #ifdef ETIME
-        case ETIME: return DRFLAC_TIMEOUT;
-    #endif
-    #ifdef ENOSR
-        case ENOSR: return DRFLAC_NO_DATA_AVAILABLE;
-    #endif
-    #ifdef ENONET
-        case ENONET: return DRFLAC_NO_NETWORK;
-    #endif
-    #ifdef ENOPKG
-        case ENOPKG: return DRFLAC_ERROR;
-    #endif
-    #ifdef EREMOTE
-        case EREMOTE: return DRFLAC_ERROR;
-    #endif
-    #ifdef ENOLINK
-        case ENOLINK: return DRFLAC_ERROR;
-    #endif
-    #ifdef EADV
-        case EADV: return DRFLAC_ERROR;
-    #endif
-    #ifdef ESRMNT
-        case ESRMNT: return DRFLAC_ERROR;
-    #endif
-    #ifdef ECOMM
-        case ECOMM: return DRFLAC_ERROR;
-    #endif
-    #ifdef EPROTO
-        case EPROTO: return DRFLAC_ERROR;
-    #endif
-    #ifdef EMULTIHOP
-        case EMULTIHOP: return DRFLAC_ERROR;
-    #endif
-    #ifdef EDOTDOT
-        case EDOTDOT: return DRFLAC_ERROR;
-    #endif
-    #ifdef EBADMSG
-        case EBADMSG: return DRFLAC_BAD_MESSAGE;
-    #endif
-    #ifdef EOVERFLOW
-        case EOVERFLOW: return DRFLAC_TOO_BIG;
-    #endif
-    #ifdef ENOTUNIQ
-        case ENOTUNIQ: return DRFLAC_NOT_UNIQUE;
-    #endif
-    #ifdef EBADFD
-        case EBADFD: return DRFLAC_ERROR;
-    #endif
-    #ifdef EREMCHG
-        case EREMCHG: return DRFLAC_ERROR;
-    #endif
-    #ifdef ELIBACC
-        case ELIBACC: return DRFLAC_ACCESS_DENIED;
-    #endif
-    #ifdef ELIBBAD
-        case ELIBBAD: return DRFLAC_INVALID_FILE;
-    #endif
-    #ifdef ELIBSCN
-        case ELIBSCN: return DRFLAC_INVALID_FILE;
-    #endif
-    #ifdef ELIBMAX
-        case ELIBMAX: return DRFLAC_ERROR;
-    #endif
-    #ifdef ELIBEXEC
-        case ELIBEXEC: return DRFLAC_ERROR;
-    #endif
-    #ifdef EILSEQ
-        case EILSEQ: return DRFLAC_INVALID_DATA;
-    #endif
-    #ifdef ERESTART
-        case ERESTART: return DRFLAC_ERROR;
-    #endif
-    #ifdef ESTRPIPE
-        case ESTRPIPE: return DRFLAC_ERROR;
-    #endif
-    #ifdef EUSERS
-        case EUSERS: return DRFLAC_ERROR;
-    #endif
-    #ifdef ENOTSOCK
-        case ENOTSOCK: return DRFLAC_NOT_SOCKET;
-    #endif
-    #ifdef EDESTADDRREQ
-        case EDESTADDRREQ: return DRFLAC_NO_ADDRESS;
-    #endif
-    #ifdef EMSGSIZE
-        case EMSGSIZE: return DRFLAC_TOO_BIG;
-    #endif
-    #ifdef EPROTOTYPE
-        case EPROTOTYPE: return DRFLAC_BAD_PROTOCOL;
-    #endif
-    #ifdef ENOPROTOOPT
-        case ENOPROTOOPT: return DRFLAC_PROTOCOL_UNAVAILABLE;
-    #endif
-    #ifdef EPROTONOSUPPORT
-        case EPROTONOSUPPORT: return DRFLAC_PROTOCOL_NOT_SUPPORTED;
-    #endif
-    #ifdef ESOCKTNOSUPPORT
-        case ESOCKTNOSUPPORT: return DRFLAC_SOCKET_NOT_SUPPORTED;
-    #endif
-    #ifdef EOPNOTSUPP
-        case EOPNOTSUPP: return DRFLAC_INVALID_OPERATION;
-    #endif
-    #ifdef EPFNOSUPPORT
-        case EPFNOSUPPORT: return DRFLAC_PROTOCOL_FAMILY_NOT_SUPPORTED;
-    #endif
-    #ifdef EAFNOSUPPORT
-        case EAFNOSUPPORT: return DRFLAC_ADDRESS_FAMILY_NOT_SUPPORTED;
-    #endif
-    #ifdef EADDRINUSE
-        case EADDRINUSE: return DRFLAC_ALREADY_IN_USE;
-    #endif
-    #ifdef EADDRNOTAVAIL
-        case EADDRNOTAVAIL: return DRFLAC_ERROR;
-    #endif
-    #ifdef ENETDOWN
-        case ENETDOWN: return DRFLAC_NO_NETWORK;
-    #endif
-    #ifdef ENETUNREACH
-        case ENETUNREACH: return DRFLAC_NO_NETWORK;
-    #endif
-    #ifdef ENETRESET
-        case ENETRESET: return DRFLAC_NO_NETWORK;
-    #endif
-    #ifdef ECONNABORTED
-        case ECONNABORTED: return DRFLAC_NO_NETWORK;
-    #endif
-    #ifdef ECONNRESET
-        case ECONNRESET: return DRFLAC_CONNECTION_RESET;
-    #endif
-    #ifdef ENOBUFS
-        case ENOBUFS: return DRFLAC_NO_SPACE;
-    #endif
-    #ifdef EISCONN
-        case EISCONN: return DRFLAC_ALREADY_CONNECTED;
-    #endif
-    #ifdef ENOTCONN
-        case ENOTCONN: return DRFLAC_NOT_CONNECTED;
-    #endif
-    #ifdef ESHUTDOWN
-        case ESHUTDOWN: return DRFLAC_ERROR;
-    #endif
-    #ifdef ETOOMANYREFS
-        case ETOOMANYREFS: return DRFLAC_ERROR;
-    #endif
-    #ifdef ETIMEDOUT
-        case ETIMEDOUT: return DRFLAC_TIMEOUT;
-    #endif
-    #ifdef ECONNREFUSED
-        case ECONNREFUSED: return DRFLAC_CONNECTION_REFUSED;
-    #endif
-    #ifdef EHOSTDOWN
-        case EHOSTDOWN: return DRFLAC_NO_HOST;
-    #endif
-    #ifdef EHOSTUNREACH
-        case EHOSTUNREACH: return DRFLAC_NO_HOST;
-    #endif
-    #ifdef EALREADY
-        case EALREADY: return DRFLAC_IN_PROGRESS;
-    #endif
-    #ifdef EINPROGRESS
-        case EINPROGRESS: return DRFLAC_IN_PROGRESS;
-    #endif
-    #ifdef ESTALE
-        case ESTALE: return DRFLAC_INVALID_FILE;
-    #endif
-    #ifdef EUCLEAN
-        case EUCLEAN: return DRFLAC_ERROR;
-    #endif
-    #ifdef ENOTNAM
-        case ENOTNAM: return DRFLAC_ERROR;
-    #endif
-    #ifdef ENAVAIL
-        case ENAVAIL: return DRFLAC_ERROR;
-    #endif
-    #ifdef EISNAM
-        case EISNAM: return DRFLAC_ERROR;
-    #endif
-    #ifdef EREMOTEIO
-        case EREMOTEIO: return DRFLAC_IO_ERROR;
-    #endif
-    #ifdef EDQUOT
-        case EDQUOT: return DRFLAC_NO_SPACE;
-    #endif
-    #ifdef ENOMEDIUM
-        case ENOMEDIUM: return DRFLAC_DOES_NOT_EXIST;
-    #endif
-    #ifdef EMEDIUMTYPE
-        case EMEDIUMTYPE: return DRFLAC_ERROR;
-    #endif
-    #ifdef ECANCELED
-        case ECANCELED: return DRFLAC_CANCELLED;
-    #endif
-    #ifdef ENOKEY
-        case ENOKEY: return DRFLAC_ERROR;
-    #endif
-    #ifdef EKEYEXPIRED
-        case EKEYEXPIRED: return DRFLAC_ERROR;
-    #endif
-    #ifdef EKEYREVOKED
-        case EKEYREVOKED: return DRFLAC_ERROR;
-    #endif
-    #ifdef EKEYREJECTED
-        case EKEYREJECTED: return DRFLAC_ERROR;
-    #endif
-    #ifdef EOWNERDEAD
-        case EOWNERDEAD: return DRFLAC_ERROR;
-    #endif
-    #ifdef ENOTRECOVERABLE
-        case ENOTRECOVERABLE: return DRFLAC_ERROR;
-    #endif
-    #ifdef ERFKILL
-        case ERFKILL: return DRFLAC_ERROR;
-    #endif
-    #ifdef EHWPOISON
-        case EHWPOISON: return DRFLAC_ERROR;
-    #endif
-        default: return DRFLAC_ERROR;
-    }
-}
-
-static drflac_result drflac_fopen(FILE** ppFile, const char* pFilePath, const char* pOpenMode)
-{
-#if _MSC_VER && _MSC_VER >= 1400
-    errno_t err;
-#endif
-
-    if (ppFile != NULL) {
-        *ppFile = NULL;  /* Safety. */
-    }
-
-    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
-        return DRFLAC_INVALID_ARGS;
-    }
-
-#if _MSC_VER && _MSC_VER >= 1400
-    err = fopen_s(ppFile, pFilePath, pOpenMode);
-    if (err != 0) {
-        return drflac_result_from_errno(err);
-    }
-#else
-#if defined(_WIN32) || defined(__APPLE__)
-    *ppFile = fopen(pFilePath, pOpenMode);
-#else
-    #if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS == 64 && defined(_LARGEFILE64_SOURCE)
-        *ppFile = fopen64(pFilePath, pOpenMode);
-    #else
-        *ppFile = fopen(pFilePath, pOpenMode);
-    #endif
-#endif
-    if (*ppFile == NULL) {
-        drflac_result result = drflac_result_from_errno(errno);
-        if (result == DRFLAC_SUCCESS) {
-            result = DRFLAC_ERROR;   /* Just a safety check to make sure we never ever return success when pFile == NULL. */
-        }
-
-        return result;
-    }
-#endif
-
-    return DRFLAC_SUCCESS;
-}
-
-/*
-_wfopen() isn't always available in all compilation environments.
-
-    * Windows only.
-    * MSVC seems to support it universally as far back as VC6 from what I can tell (haven't checked further back).
-    * MinGW-64 (both 32- and 64-bit) seems to support it.
-    * MinGW wraps it in !defined(__STRICT_ANSI__).
-
-This can be reviewed as compatibility issues arise. The preference is to use _wfopen_s() and _wfopen() as opposed to the wcsrtombs()
-fallback, so if you notice your compiler not detecting this properly I'm happy to look at adding support.
-*/
-#if defined(_WIN32)
-    #if defined(_MSC_VER) || defined(__MINGW64__) || !defined(__STRICT_ANSI__)
-        #define DRFLAC_HAS_WFOPEN
-    #endif
-#endif
-
-static drflac_result drflac_wfopen(FILE** ppFile, const wchar_t* pFilePath, const wchar_t* pOpenMode, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    if (ppFile != NULL) {
-        *ppFile = NULL;  /* Safety. */
-    }
-
-    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
-        return DRFLAC_INVALID_ARGS;
-    }
-
-#if defined(DRFLAC_HAS_WFOPEN)
-    {
-        /* Use _wfopen() on Windows. */
-    #if defined(_MSC_VER) && _MSC_VER >= 1400
-        errno_t err = _wfopen_s(ppFile, pFilePath, pOpenMode);
-        if (err != 0) {
-            return drflac_result_from_errno(err);
-        }
-    #else
-        *ppFile = _wfopen(pFilePath, pOpenMode);
-        if (*ppFile == NULL) {
-            return drflac_result_from_errno(errno);
-        }
-    #endif
-        (void)pAllocationCallbacks;
-    }
-#else
-    /*
-    Use fopen() on anything other than Windows. Requires a conversion. This is annoying because fopen() is locale specific. The only real way I can
-    think of to do this is with wcsrtombs(). Note that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for
-    maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler error I'll look into improving compatibility.
-    */
-    {
-        mbstate_t mbs;
-        size_t lenMB;
-        const wchar_t* pFilePathTemp = pFilePath;
-        char* pFilePathMB = NULL;
-        char pOpenModeMB[32] = {0};
-
-        /* Get the length first. */
-        DRFLAC_ZERO_OBJECT(&mbs);
-        lenMB = wcsrtombs(NULL, &pFilePathTemp, 0, &mbs);
-        if (lenMB == (size_t)-1) {
-            return drflac_result_from_errno(errno);
-        }
-
-        pFilePathMB = (char*)drflac__malloc_from_callbacks(lenMB + 1, pAllocationCallbacks);
-        if (pFilePathMB == NULL) {
-            return DRFLAC_OUT_OF_MEMORY;
-        }
-
-        pFilePathTemp = pFilePath;
-        DRFLAC_ZERO_OBJECT(&mbs);
-        wcsrtombs(pFilePathMB, &pFilePathTemp, lenMB + 1, &mbs);
-
-        /* The open mode should always consist of ASCII characters so we should be able to do a trivial conversion. */
-        {
-            size_t i = 0;
-            for (;;) {
-                if (pOpenMode[i] == 0) {
-                    pOpenModeMB[i] = '\0';
-                    break;
-                }
-
-                pOpenModeMB[i] = (char)pOpenMode[i];
-                i += 1;
-            }
-        }
-
-        *ppFile = fopen(pFilePathMB, pOpenModeMB);
-
-        drflac__free_from_callbacks(pFilePathMB, pAllocationCallbacks);
-    }
-
-    if (*ppFile == NULL) {
-        return DRFLAC_ERROR;
-    }
-#endif
-
-    return DRFLAC_SUCCESS;
-}
-
-static size_t drflac__on_read_stdio(void* pUserData, void* bufferOut, size_t bytesToRead)
-{
-    return fread(bufferOut, 1, bytesToRead, (FILE*)pUserData);
-}
-
-static drflac_bool32 drflac__on_seek_stdio(void* pUserData, int offset, drflac_seek_origin origin)
-{
-    DRFLAC_ASSERT(offset >= 0);  /* <-- Never seek backwards. */
-
-    return fseek((FILE*)pUserData, offset, (origin == drflac_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0;
-}
-
-
-DRFLAC_API drflac* drflac_open_file(const char* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-    FILE* pFile;
-
-    if (drflac_fopen(&pFile, pFileName, "rb") != DRFLAC_SUCCESS) {
-        return NULL;
-    }
-
-    pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        fclose(pFile);
-        return NULL;
-    }
-
-    return pFlac;
-}
-
-DRFLAC_API drflac* drflac_open_file_w(const wchar_t* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-    FILE* pFile;
-
-    if (drflac_wfopen(&pFile, pFileName, L"rb", pAllocationCallbacks) != DRFLAC_SUCCESS) {
-        return NULL;
-    }
-
-    pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        fclose(pFile);
-        return NULL;
-    }
-
-    return pFlac;
-}
-
-DRFLAC_API drflac* drflac_open_file_with_metadata(const char* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-    FILE* pFile;
-
-    if (drflac_fopen(&pFile, pFileName, "rb") != DRFLAC_SUCCESS) {
-        return NULL;
-    }
-
-    pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, onMeta, drflac_container_unknown, (void*)pFile, pUserData, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        fclose(pFile);
-        return pFlac;
-    }
-
-    return pFlac;
-}
-
-DRFLAC_API drflac* drflac_open_file_with_metadata_w(const wchar_t* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-    FILE* pFile;
-
-    if (drflac_wfopen(&pFile, pFileName, L"rb", pAllocationCallbacks) != DRFLAC_SUCCESS) {
-        return NULL;
-    }
-
-    pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, onMeta, drflac_container_unknown, (void*)pFile, pUserData, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        fclose(pFile);
-        return pFlac;
-    }
-
-    return pFlac;
-}
-#endif  /* DR_FLAC_NO_STDIO */
-
-static size_t drflac__on_read_memory(void* pUserData, void* bufferOut, size_t bytesToRead)
-{
-    drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData;
-    size_t bytesRemaining;
-
-    DRFLAC_ASSERT(memoryStream != NULL);
-    DRFLAC_ASSERT(memoryStream->dataSize >= memoryStream->currentReadPos);
-
-    bytesRemaining = memoryStream->dataSize - memoryStream->currentReadPos;
-    if (bytesToRead > bytesRemaining) {
-        bytesToRead = bytesRemaining;
-    }
-
-    if (bytesToRead > 0) {
-        DRFLAC_COPY_MEMORY(bufferOut, memoryStream->data + memoryStream->currentReadPos, bytesToRead);
-        memoryStream->currentReadPos += bytesToRead;
-    }
-
-    return bytesToRead;
-}
-
-static drflac_bool32 drflac__on_seek_memory(void* pUserData, int offset, drflac_seek_origin origin)
-{
-    drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData;
-
-    DRFLAC_ASSERT(memoryStream != NULL);
-    DRFLAC_ASSERT(offset >= 0); /* <-- Never seek backwards. */
-
-    if (offset > (drflac_int64)memoryStream->dataSize) {
-        return DRFLAC_FALSE;
-    }
-
-    if (origin == drflac_seek_origin_current) {
-        if (memoryStream->currentReadPos + offset <= memoryStream->dataSize) {
-            memoryStream->currentReadPos += offset;
-        } else {
-            return DRFLAC_FALSE;  /* Trying to seek too far forward. */
-        }
-    } else {
-        if ((drflac_uint32)offset <= memoryStream->dataSize) {
-            memoryStream->currentReadPos = offset;
-        } else {
-            return DRFLAC_FALSE;  /* Trying to seek too far forward. */
-        }
-    }
-
-    return DRFLAC_TRUE;
-}
-
-DRFLAC_API drflac* drflac_open_memory(const void* pData, size_t dataSize, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac__memory_stream memoryStream;
-    drflac* pFlac;
-
-    memoryStream.data = (const drflac_uint8*)pData;
-    memoryStream.dataSize = dataSize;
-    memoryStream.currentReadPos = 0;
-    pFlac = drflac_open(drflac__on_read_memory, drflac__on_seek_memory, &memoryStream, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    pFlac->memoryStream = memoryStream;
-
-    /* This is an awful hack... */
-#ifndef DR_FLAC_NO_OGG
-    if (pFlac->container == drflac_container_ogg)
-    {
-        drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
-        oggbs->pUserData = &pFlac->memoryStream;
-    }
-    else
-#endif
-    {
-        pFlac->bs.pUserData = &pFlac->memoryStream;
-    }
-
-    return pFlac;
-}
-
-DRFLAC_API drflac* drflac_open_memory_with_metadata(const void* pData, size_t dataSize, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac__memory_stream memoryStream;
-    drflac* pFlac;
-
-    memoryStream.data = (const drflac_uint8*)pData;
-    memoryStream.dataSize = dataSize;
-    memoryStream.currentReadPos = 0;
-    pFlac = drflac_open_with_metadata_private(drflac__on_read_memory, drflac__on_seek_memory, onMeta, drflac_container_unknown, &memoryStream, pUserData, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    pFlac->memoryStream = memoryStream;
-
-    /* This is an awful hack... */
-#ifndef DR_FLAC_NO_OGG
-    if (pFlac->container == drflac_container_ogg)
-    {
-        drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
-        oggbs->pUserData = &pFlac->memoryStream;
-    }
-    else
-#endif
-    {
-        pFlac->bs.pUserData = &pFlac->memoryStream;
-    }
-
-    return pFlac;
-}
-
-
-
-DRFLAC_API drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    return drflac_open_with_metadata_private(onRead, onSeek, NULL, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks);
-}
-DRFLAC_API drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    return drflac_open_with_metadata_private(onRead, onSeek, NULL, container, pUserData, pUserData, pAllocationCallbacks);
-}
-
-DRFLAC_API drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    return drflac_open_with_metadata_private(onRead, onSeek, onMeta, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks);
-}
-DRFLAC_API drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    return drflac_open_with_metadata_private(onRead, onSeek, onMeta, container, pUserData, pUserData, pAllocationCallbacks);
-}
-
-DRFLAC_API void drflac_close(drflac* pFlac)
-{
-    if (pFlac == NULL) {
-        return;
-    }
-
-#ifndef DR_FLAC_NO_STDIO
-    /*
-    If we opened the file with drflac_open_file() we will want to close the file handle. We can know whether or not drflac_open_file()
-    was used by looking at the callbacks.
-    */
-    if (pFlac->bs.onRead == drflac__on_read_stdio) {
-        fclose((FILE*)pFlac->bs.pUserData);
-    }
-
-#ifndef DR_FLAC_NO_OGG
-    /* Need to clean up Ogg streams a bit differently due to the way the bit streaming is chained. */
-    if (pFlac->container == drflac_container_ogg) {
-        drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
-        DRFLAC_ASSERT(pFlac->bs.onRead == drflac__on_read_ogg);
-
-        if (oggbs->onRead == drflac__on_read_stdio) {
-            fclose((FILE*)oggbs->pUserData);
-        }
-    }
-#endif
-#endif
-
-    drflac__free_from_callbacks(pFlac, &pFlac->allocationCallbacks);
-}
-
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    for (i = 0; i < frameCount; ++i) {
-        drflac_uint32 left  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-        drflac_uint32 side  = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-        drflac_uint32 right = left - side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left;
-        pOutputSamples[i*2+1] = (drflac_int32)right;
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    for (i = 0; i < frameCount4; ++i) {
-        drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0;
-        drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0;
-        drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0;
-        drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0;
-
-        drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1;
-        drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1;
-        drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1;
-        drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1;
-
-        drflac_uint32 right0 = left0 - side0;
-        drflac_uint32 right1 = left1 - side1;
-        drflac_uint32 right2 = left2 - side2;
-        drflac_uint32 right3 = left3 - side3;
-
-        pOutputSamples[i*8+0] = (drflac_int32)left0;
-        pOutputSamples[i*8+1] = (drflac_int32)right0;
-        pOutputSamples[i*8+2] = (drflac_int32)left1;
-        pOutputSamples[i*8+3] = (drflac_int32)right1;
-        pOutputSamples[i*8+4] = (drflac_int32)left2;
-        pOutputSamples[i*8+5] = (drflac_int32)right2;
-        pOutputSamples[i*8+6] = (drflac_int32)left3;
-        pOutputSamples[i*8+7] = (drflac_int32)right3;
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
-        drflac_uint32 right = left - side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left;
-        pOutputSamples[i*2+1] = (drflac_int32)right;
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    for (i = 0; i < frameCount4; ++i) {
-        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
-        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
-        __m128i right = _mm_sub_epi32(left, side);
-
-        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
-        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
-        drflac_uint32 right = left - side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left;
-        pOutputSamples[i*2+1] = (drflac_int32)right;
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-    int32x4_t shift0_4;
-    int32x4_t shift1_4;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    shift0_4 = vdupq_n_s32(shift0);
-    shift1_4 = vdupq_n_s32(shift1);
-
-    for (i = 0; i < frameCount4; ++i) {
-        uint32x4_t left;
-        uint32x4_t side;
-        uint32x4_t right;
-
-        left  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
-        side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
-        right = vsubq_u32(left, side);
-
-        drflac__vst2q_u32((drflac_uint32*)pOutputSamples + i*8, vzipq_u32(left, right));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
-        drflac_uint32 right = left - side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left;
-        pOutputSamples[i*2+1] = (drflac_int32)right;
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_s32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_s32__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    for (i = 0; i < frameCount; ++i) {
-        drflac_uint32 side  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-        drflac_uint32 right = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-        drflac_uint32 left  = right + side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left;
-        pOutputSamples[i*2+1] = (drflac_int32)right;
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    for (i = 0; i < frameCount4; ++i) {
-        drflac_uint32 side0  = pInputSamples0U32[i*4+0] << shift0;
-        drflac_uint32 side1  = pInputSamples0U32[i*4+1] << shift0;
-        drflac_uint32 side2  = pInputSamples0U32[i*4+2] << shift0;
-        drflac_uint32 side3  = pInputSamples0U32[i*4+3] << shift0;
-
-        drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1;
-        drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1;
-        drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1;
-        drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1;
-
-        drflac_uint32 left0 = right0 + side0;
-        drflac_uint32 left1 = right1 + side1;
-        drflac_uint32 left2 = right2 + side2;
-        drflac_uint32 left3 = right3 + side3;
-
-        pOutputSamples[i*8+0] = (drflac_int32)left0;
-        pOutputSamples[i*8+1] = (drflac_int32)right0;
-        pOutputSamples[i*8+2] = (drflac_int32)left1;
-        pOutputSamples[i*8+3] = (drflac_int32)right1;
-        pOutputSamples[i*8+4] = (drflac_int32)left2;
-        pOutputSamples[i*8+5] = (drflac_int32)right2;
-        pOutputSamples[i*8+6] = (drflac_int32)left3;
-        pOutputSamples[i*8+7] = (drflac_int32)right3;
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 right = pInputSamples1U32[i] << shift1;
-        drflac_uint32 left  = right + side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left;
-        pOutputSamples[i*2+1] = (drflac_int32)right;
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    for (i = 0; i < frameCount4; ++i) {
-        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
-        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
-        __m128i left  = _mm_add_epi32(right, side);
-
-        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
-        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 right = pInputSamples1U32[i] << shift1;
-        drflac_uint32 left  = right + side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left;
-        pOutputSamples[i*2+1] = (drflac_int32)right;
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-    int32x4_t shift0_4;
-    int32x4_t shift1_4;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    shift0_4 = vdupq_n_s32(shift0);
-    shift1_4 = vdupq_n_s32(shift1);
-
-    for (i = 0; i < frameCount4; ++i) {
-        uint32x4_t side;
-        uint32x4_t right;
-        uint32x4_t left;
-
-        side  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
-        right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
-        left  = vaddq_u32(right, side);
-
-        drflac__vst2q_u32((drflac_uint32*)pOutputSamples + i*8, vzipq_u32(left, right));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 right = pInputSamples1U32[i] << shift1;
-        drflac_uint32 left  = right + side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left;
-        pOutputSamples[i*2+1] = (drflac_int32)right;
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_s32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_s32__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    for (drflac_uint64 i = 0; i < frameCount; ++i) {
-        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-        mid = (mid << 1) | (side & 0x01);
-
-        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample);
-        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample);
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_int32 shift = unusedBitsPerSample;
-
-    if (shift > 0) {
-        shift -= 1;
-        for (i = 0; i < frameCount4; ++i) {
-            drflac_uint32 temp0L;
-            drflac_uint32 temp1L;
-            drflac_uint32 temp2L;
-            drflac_uint32 temp3L;
-            drflac_uint32 temp0R;
-            drflac_uint32 temp1R;
-            drflac_uint32 temp2R;
-            drflac_uint32 temp3R;
-
-            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-
-            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid0 = (mid0 << 1) | (side0 & 0x01);
-            mid1 = (mid1 << 1) | (side1 & 0x01);
-            mid2 = (mid2 << 1) | (side2 & 0x01);
-            mid3 = (mid3 << 1) | (side3 & 0x01);
-
-            temp0L = (mid0 + side0) << shift;
-            temp1L = (mid1 + side1) << shift;
-            temp2L = (mid2 + side2) << shift;
-            temp3L = (mid3 + side3) << shift;
-
-            temp0R = (mid0 - side0) << shift;
-            temp1R = (mid1 - side1) << shift;
-            temp2R = (mid2 - side2) << shift;
-            temp3R = (mid3 - side3) << shift;
-
-            pOutputSamples[i*8+0] = (drflac_int32)temp0L;
-            pOutputSamples[i*8+1] = (drflac_int32)temp0R;
-            pOutputSamples[i*8+2] = (drflac_int32)temp1L;
-            pOutputSamples[i*8+3] = (drflac_int32)temp1R;
-            pOutputSamples[i*8+4] = (drflac_int32)temp2L;
-            pOutputSamples[i*8+5] = (drflac_int32)temp2R;
-            pOutputSamples[i*8+6] = (drflac_int32)temp3L;
-            pOutputSamples[i*8+7] = (drflac_int32)temp3R;
-        }
-    } else {
-        for (i = 0; i < frameCount4; ++i) {
-            drflac_uint32 temp0L;
-            drflac_uint32 temp1L;
-            drflac_uint32 temp2L;
-            drflac_uint32 temp3L;
-            drflac_uint32 temp0R;
-            drflac_uint32 temp1R;
-            drflac_uint32 temp2R;
-            drflac_uint32 temp3R;
-
-            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-
-            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid0 = (mid0 << 1) | (side0 & 0x01);
-            mid1 = (mid1 << 1) | (side1 & 0x01);
-            mid2 = (mid2 << 1) | (side2 & 0x01);
-            mid3 = (mid3 << 1) | (side3 & 0x01);
-
-            temp0L = (drflac_uint32)((drflac_int32)(mid0 + side0) >> 1);
-            temp1L = (drflac_uint32)((drflac_int32)(mid1 + side1) >> 1);
-            temp2L = (drflac_uint32)((drflac_int32)(mid2 + side2) >> 1);
-            temp3L = (drflac_uint32)((drflac_int32)(mid3 + side3) >> 1);
-
-            temp0R = (drflac_uint32)((drflac_int32)(mid0 - side0) >> 1);
-            temp1R = (drflac_uint32)((drflac_int32)(mid1 - side1) >> 1);
-            temp2R = (drflac_uint32)((drflac_int32)(mid2 - side2) >> 1);
-            temp3R = (drflac_uint32)((drflac_int32)(mid3 - side3) >> 1);
-
-            pOutputSamples[i*8+0] = (drflac_int32)temp0L;
-            pOutputSamples[i*8+1] = (drflac_int32)temp0R;
-            pOutputSamples[i*8+2] = (drflac_int32)temp1L;
-            pOutputSamples[i*8+3] = (drflac_int32)temp1R;
-            pOutputSamples[i*8+4] = (drflac_int32)temp2L;
-            pOutputSamples[i*8+5] = (drflac_int32)temp2R;
-            pOutputSamples[i*8+6] = (drflac_int32)temp3L;
-            pOutputSamples[i*8+7] = (drflac_int32)temp3R;
-        }
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-        mid = (mid << 1) | (side & 0x01);
-
-        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample);
-        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample);
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_int32 shift = unusedBitsPerSample;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    if (shift == 0) {
-        for (i = 0; i < frameCount4; ++i) {
-            __m128i mid;
-            __m128i side;
-            __m128i left;
-            __m128i right;
-
-            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-
-            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
-
-            left  = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
-            right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
-
-            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
-            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = (drflac_int32)(mid + side) >> 1;
-            pOutputSamples[i*2+1] = (drflac_int32)(mid - side) >> 1;
-        }
-    } else {
-        shift -= 1;
-        for (i = 0; i < frameCount4; ++i) {
-            __m128i mid;
-            __m128i side;
-            __m128i left;
-            __m128i right;
-
-            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-
-            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
-
-            left  = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
-            right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
-
-            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
-            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift);
-            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift);
-        }
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_int32 shift = unusedBitsPerSample;
-    int32x4_t  wbpsShift0_4; /* wbps = Wasted Bits Per Sample */
-    int32x4_t  wbpsShift1_4; /* wbps = Wasted Bits Per Sample */
-    uint32x4_t one4;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    wbpsShift0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-    wbpsShift1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-    one4         = vdupq_n_u32(1);
-
-    if (shift == 0) {
-        for (i = 0; i < frameCount4; ++i) {
-            uint32x4_t mid;
-            uint32x4_t side;
-            int32x4_t left;
-            int32x4_t right;
-
-            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
-            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
-
-            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, one4));
-
-            left  = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
-            right = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
-
-            drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = (drflac_int32)(mid + side) >> 1;
-            pOutputSamples[i*2+1] = (drflac_int32)(mid - side) >> 1;
-        }
-    } else {
-        int32x4_t shift4;
-
-        shift -= 1;
-        shift4 = vdupq_n_s32(shift);
-
-        for (i = 0; i < frameCount4; ++i) {
-            uint32x4_t mid;
-            uint32x4_t side;
-            int32x4_t left;
-            int32x4_t right;
-
-            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
-            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
-
-            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, one4));
-
-            left  = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
-            right = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
-
-            drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift);
-            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift);
-        }
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_s32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_s32__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    for (drflac_uint64 i = 0; i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample));
-        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample));
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    for (i = 0; i < frameCount4; ++i) {
-        drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0;
-        drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0;
-        drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0;
-        drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0;
-
-        drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1;
-        drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1;
-        drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1;
-        drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1;
-
-        pOutputSamples[i*8+0] = (drflac_int32)tempL0;
-        pOutputSamples[i*8+1] = (drflac_int32)tempR0;
-        pOutputSamples[i*8+2] = (drflac_int32)tempL1;
-        pOutputSamples[i*8+3] = (drflac_int32)tempR1;
-        pOutputSamples[i*8+4] = (drflac_int32)tempL2;
-        pOutputSamples[i*8+5] = (drflac_int32)tempR2;
-        pOutputSamples[i*8+6] = (drflac_int32)tempL3;
-        pOutputSamples[i*8+7] = (drflac_int32)tempR3;
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0);
-        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1);
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    for (i = 0; i < frameCount4; ++i) {
-        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
-        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
-
-        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
-        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0);
-        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1);
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    int32x4_t shift4_0 = vdupq_n_s32(shift0);
-    int32x4_t shift4_1 = vdupq_n_s32(shift1);
-
-    for (i = 0; i < frameCount4; ++i) {
-        int32x4_t left;
-        int32x4_t right;
-
-        left  = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift4_0));
-        right = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift4_1));
-
-        drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0);
-        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1);
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_s32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_s32__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-
-DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut)
-{
-    drflac_uint64 framesRead;
-    drflac_uint32 unusedBitsPerSample;
-
-    if (pFlac == NULL || framesToRead == 0) {
-        return 0;
-    }
-
-    if (pBufferOut == NULL) {
-        return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead);
-    }
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 32);
-    unusedBitsPerSample = 32 - pFlac->bitsPerSample;
-
-    framesRead = 0;
-    while (framesToRead > 0) {
-        /* If we've run out of samples in this frame, go to the next. */
-        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
-            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
-                break;  /* Couldn't read the next frame, so just break from the loop and return. */
-            }
-        } else {
-            unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
-            drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining;
-            drflac_uint64 frameCountThisIteration = framesToRead;
-
-            if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) {
-                frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining;
-            }
-
-            if (channelCount == 2) {
-                const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame;
-                const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame;
-
-                switch (pFlac->currentFLACFrame.header.channelAssignment)
-                {
-                    case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE:
-                    {
-                        drflac_read_pcm_frames_s32__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-
-                    case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
-                    {
-                        drflac_read_pcm_frames_s32__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-
-                    case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE:
-                    {
-                        drflac_read_pcm_frames_s32__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-
-                    case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT:
-                    default:
-                    {
-                        drflac_read_pcm_frames_s32__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-                }
-            } else {
-                /* Generic interleaving. */
-                drflac_uint64 i;
-                for (i = 0; i < frameCountThisIteration; ++i) {
-                    unsigned int j;
-                    for (j = 0; j < channelCount; ++j) {
-                        pBufferOut[(i*channelCount)+j] = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample));
-                    }
-                }
-            }
-
-            framesRead                += frameCountThisIteration;
-            pBufferOut                += frameCountThisIteration * channelCount;
-            framesToRead              -= frameCountThisIteration;
-            pFlac->currentPCMFrame    += frameCountThisIteration;
-            pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)frameCountThisIteration;
-        }
-    }
-
-    return framesRead;
-}
-
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    for (i = 0; i < frameCount; ++i) {
-        drflac_uint32 left  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-        drflac_uint32 side  = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-        drflac_uint32 right = left - side;
-
-        left  >>= 16;
-        right >>= 16;
-
-        pOutputSamples[i*2+0] = (drflac_int16)left;
-        pOutputSamples[i*2+1] = (drflac_int16)right;
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    for (i = 0; i < frameCount4; ++i) {
-        drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0;
-        drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0;
-        drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0;
-        drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0;
-
-        drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1;
-        drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1;
-        drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1;
-        drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1;
-
-        drflac_uint32 right0 = left0 - side0;
-        drflac_uint32 right1 = left1 - side1;
-        drflac_uint32 right2 = left2 - side2;
-        drflac_uint32 right3 = left3 - side3;
-
-        left0  >>= 16;
-        left1  >>= 16;
-        left2  >>= 16;
-        left3  >>= 16;
-
-        right0 >>= 16;
-        right1 >>= 16;
-        right2 >>= 16;
-        right3 >>= 16;
-
-        pOutputSamples[i*8+0] = (drflac_int16)left0;
-        pOutputSamples[i*8+1] = (drflac_int16)right0;
-        pOutputSamples[i*8+2] = (drflac_int16)left1;
-        pOutputSamples[i*8+3] = (drflac_int16)right1;
-        pOutputSamples[i*8+4] = (drflac_int16)left2;
-        pOutputSamples[i*8+5] = (drflac_int16)right2;
-        pOutputSamples[i*8+6] = (drflac_int16)left3;
-        pOutputSamples[i*8+7] = (drflac_int16)right3;
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
-        drflac_uint32 right = left - side;
-
-        left  >>= 16;
-        right >>= 16;
-
-        pOutputSamples[i*2+0] = (drflac_int16)left;
-        pOutputSamples[i*2+1] = (drflac_int16)right;
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    for (i = 0; i < frameCount4; ++i) {
-        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
-        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
-        __m128i right = _mm_sub_epi32(left, side);
-
-        left  = _mm_srai_epi32(left,  16);
-        right = _mm_srai_epi32(right, 16);
-
-        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
-        drflac_uint32 right = left - side;
-
-        left  >>= 16;
-        right >>= 16;
-
-        pOutputSamples[i*2+0] = (drflac_int16)left;
-        pOutputSamples[i*2+1] = (drflac_int16)right;
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-    int32x4_t shift0_4;
-    int32x4_t shift1_4;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    shift0_4 = vdupq_n_s32(shift0);
-    shift1_4 = vdupq_n_s32(shift1);
-
-    for (i = 0; i < frameCount4; ++i) {
-        uint32x4_t left;
-        uint32x4_t side;
-        uint32x4_t right;
-
-        left  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
-        side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
-        right = vsubq_u32(left, side);
-
-        left  = vshrq_n_u32(left,  16);
-        right = vshrq_n_u32(right, 16);
-
-        drflac__vst2q_u16((drflac_uint16*)pOutputSamples + i*8, vzip_u16(vmovn_u32(left), vmovn_u32(right)));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
-        drflac_uint32 right = left - side;
-
-        left  >>= 16;
-        right >>= 16;
-
-        pOutputSamples[i*2+0] = (drflac_int16)left;
-        pOutputSamples[i*2+1] = (drflac_int16)right;
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s16__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s16__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_s16__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_s16__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    for (i = 0; i < frameCount; ++i) {
-        drflac_uint32 side  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-        drflac_uint32 right = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-        drflac_uint32 left  = right + side;
-
-        left  >>= 16;
-        right >>= 16;
-
-        pOutputSamples[i*2+0] = (drflac_int16)left;
-        pOutputSamples[i*2+1] = (drflac_int16)right;
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    for (i = 0; i < frameCount4; ++i) {
-        drflac_uint32 side0  = pInputSamples0U32[i*4+0] << shift0;
-        drflac_uint32 side1  = pInputSamples0U32[i*4+1] << shift0;
-        drflac_uint32 side2  = pInputSamples0U32[i*4+2] << shift0;
-        drflac_uint32 side3  = pInputSamples0U32[i*4+3] << shift0;
-
-        drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1;
-        drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1;
-        drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1;
-        drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1;
-
-        drflac_uint32 left0 = right0 + side0;
-        drflac_uint32 left1 = right1 + side1;
-        drflac_uint32 left2 = right2 + side2;
-        drflac_uint32 left3 = right3 + side3;
-
-        left0  >>= 16;
-        left1  >>= 16;
-        left2  >>= 16;
-        left3  >>= 16;
-
-        right0 >>= 16;
-        right1 >>= 16;
-        right2 >>= 16;
-        right3 >>= 16;
-
-        pOutputSamples[i*8+0] = (drflac_int16)left0;
-        pOutputSamples[i*8+1] = (drflac_int16)right0;
-        pOutputSamples[i*8+2] = (drflac_int16)left1;
-        pOutputSamples[i*8+3] = (drflac_int16)right1;
-        pOutputSamples[i*8+4] = (drflac_int16)left2;
-        pOutputSamples[i*8+5] = (drflac_int16)right2;
-        pOutputSamples[i*8+6] = (drflac_int16)left3;
-        pOutputSamples[i*8+7] = (drflac_int16)right3;
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 right = pInputSamples1U32[i] << shift1;
-        drflac_uint32 left  = right + side;
-
-        left  >>= 16;
-        right >>= 16;
-
-        pOutputSamples[i*2+0] = (drflac_int16)left;
-        pOutputSamples[i*2+1] = (drflac_int16)right;
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    for (i = 0; i < frameCount4; ++i) {
-        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
-        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
-        __m128i left  = _mm_add_epi32(right, side);
-
-        left  = _mm_srai_epi32(left,  16);
-        right = _mm_srai_epi32(right, 16);
-
-        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 right = pInputSamples1U32[i] << shift1;
-        drflac_uint32 left  = right + side;
-
-        left  >>= 16;
-        right >>= 16;
-
-        pOutputSamples[i*2+0] = (drflac_int16)left;
-        pOutputSamples[i*2+1] = (drflac_int16)right;
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-    int32x4_t shift0_4;
-    int32x4_t shift1_4;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    shift0_4 = vdupq_n_s32(shift0);
-    shift1_4 = vdupq_n_s32(shift1);
-
-    for (i = 0; i < frameCount4; ++i) {
-        uint32x4_t side;
-        uint32x4_t right;
-        uint32x4_t left;
-
-        side  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
-        right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
-        left  = vaddq_u32(right, side);
-
-        left  = vshrq_n_u32(left,  16);
-        right = vshrq_n_u32(right, 16);
-
-        drflac__vst2q_u16((drflac_uint16*)pOutputSamples + i*8, vzip_u16(vmovn_u32(left), vmovn_u32(right)));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 right = pInputSamples1U32[i] << shift1;
-        drflac_uint32 left  = right + side;
-
-        left  >>= 16;
-        right >>= 16;
-
-        pOutputSamples[i*2+0] = (drflac_int16)left;
-        pOutputSamples[i*2+1] = (drflac_int16)right;
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s16__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s16__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_s16__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_s16__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    for (drflac_uint64 i = 0; i < frameCount; ++i) {
-        drflac_uint32 mid  = (drflac_uint32)pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-        drflac_uint32 side = (drflac_uint32)pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-        mid = (mid << 1) | (side & 0x01);
-
-        pOutputSamples[i*2+0] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) >> 16);
-        pOutputSamples[i*2+1] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) >> 16);
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift = unusedBitsPerSample;
-
-    if (shift > 0) {
-        shift -= 1;
-        for (i = 0; i < frameCount4; ++i) {
-            drflac_uint32 temp0L;
-            drflac_uint32 temp1L;
-            drflac_uint32 temp2L;
-            drflac_uint32 temp3L;
-            drflac_uint32 temp0R;
-            drflac_uint32 temp1R;
-            drflac_uint32 temp2R;
-            drflac_uint32 temp3R;
-
-            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-
-            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid0 = (mid0 << 1) | (side0 & 0x01);
-            mid1 = (mid1 << 1) | (side1 & 0x01);
-            mid2 = (mid2 << 1) | (side2 & 0x01);
-            mid3 = (mid3 << 1) | (side3 & 0x01);
-
-            temp0L = (mid0 + side0) << shift;
-            temp1L = (mid1 + side1) << shift;
-            temp2L = (mid2 + side2) << shift;
-            temp3L = (mid3 + side3) << shift;
-
-            temp0R = (mid0 - side0) << shift;
-            temp1R = (mid1 - side1) << shift;
-            temp2R = (mid2 - side2) << shift;
-            temp3R = (mid3 - side3) << shift;
-
-            temp0L >>= 16;
-            temp1L >>= 16;
-            temp2L >>= 16;
-            temp3L >>= 16;
-
-            temp0R >>= 16;
-            temp1R >>= 16;
-            temp2R >>= 16;
-            temp3R >>= 16;
-
-            pOutputSamples[i*8+0] = (drflac_int16)temp0L;
-            pOutputSamples[i*8+1] = (drflac_int16)temp0R;
-            pOutputSamples[i*8+2] = (drflac_int16)temp1L;
-            pOutputSamples[i*8+3] = (drflac_int16)temp1R;
-            pOutputSamples[i*8+4] = (drflac_int16)temp2L;
-            pOutputSamples[i*8+5] = (drflac_int16)temp2R;
-            pOutputSamples[i*8+6] = (drflac_int16)temp3L;
-            pOutputSamples[i*8+7] = (drflac_int16)temp3R;
-        }
-    } else {
-        for (i = 0; i < frameCount4; ++i) {
-            drflac_uint32 temp0L;
-            drflac_uint32 temp1L;
-            drflac_uint32 temp2L;
-            drflac_uint32 temp3L;
-            drflac_uint32 temp0R;
-            drflac_uint32 temp1R;
-            drflac_uint32 temp2R;
-            drflac_uint32 temp3R;
-
-            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-
-            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid0 = (mid0 << 1) | (side0 & 0x01);
-            mid1 = (mid1 << 1) | (side1 & 0x01);
-            mid2 = (mid2 << 1) | (side2 & 0x01);
-            mid3 = (mid3 << 1) | (side3 & 0x01);
-
-            temp0L = ((drflac_int32)(mid0 + side0) >> 1);
-            temp1L = ((drflac_int32)(mid1 + side1) >> 1);
-            temp2L = ((drflac_int32)(mid2 + side2) >> 1);
-            temp3L = ((drflac_int32)(mid3 + side3) >> 1);
-
-            temp0R = ((drflac_int32)(mid0 - side0) >> 1);
-            temp1R = ((drflac_int32)(mid1 - side1) >> 1);
-            temp2R = ((drflac_int32)(mid2 - side2) >> 1);
-            temp3R = ((drflac_int32)(mid3 - side3) >> 1);
-
-            temp0L >>= 16;
-            temp1L >>= 16;
-            temp2L >>= 16;
-            temp3L >>= 16;
-
-            temp0R >>= 16;
-            temp1R >>= 16;
-            temp2R >>= 16;
-            temp3R >>= 16;
-
-            pOutputSamples[i*8+0] = (drflac_int16)temp0L;
-            pOutputSamples[i*8+1] = (drflac_int16)temp0R;
-            pOutputSamples[i*8+2] = (drflac_int16)temp1L;
-            pOutputSamples[i*8+3] = (drflac_int16)temp1R;
-            pOutputSamples[i*8+4] = (drflac_int16)temp2L;
-            pOutputSamples[i*8+5] = (drflac_int16)temp2R;
-            pOutputSamples[i*8+6] = (drflac_int16)temp3L;
-            pOutputSamples[i*8+7] = (drflac_int16)temp3R;
-        }
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-        mid = (mid << 1) | (side & 0x01);
-
-        pOutputSamples[i*2+0] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) >> 16);
-        pOutputSamples[i*2+1] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) >> 16);
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift = unusedBitsPerSample;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    if (shift == 0) {
-        for (i = 0; i < frameCount4; ++i) {
-            __m128i mid;
-            __m128i side;
-            __m128i left;
-            __m128i right;
-
-            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-
-            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
-
-            left  = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
-            right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
-
-            left  = _mm_srai_epi32(left,  16);
-            right = _mm_srai_epi32(right, 16);
-
-            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = (drflac_int16)(((drflac_int32)(mid + side) >> 1) >> 16);
-            pOutputSamples[i*2+1] = (drflac_int16)(((drflac_int32)(mid - side) >> 1) >> 16);
-        }
-    } else {
-        shift -= 1;
-        for (i = 0; i < frameCount4; ++i) {
-            __m128i mid;
-            __m128i side;
-            __m128i left;
-            __m128i right;
-
-            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-
-            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
-
-            left  = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
-            right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
-
-            left  = _mm_srai_epi32(left,  16);
-            right = _mm_srai_epi32(right, 16);
-
-            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) << shift) >> 16);
-            pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) << shift) >> 16);
-        }
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift = unusedBitsPerSample;
-    int32x4_t wbpsShift0_4; /* wbps = Wasted Bits Per Sample */
-    int32x4_t wbpsShift1_4; /* wbps = Wasted Bits Per Sample */
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    wbpsShift0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-    wbpsShift1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-
-    if (shift == 0) {
-        for (i = 0; i < frameCount4; ++i) {
-            uint32x4_t mid;
-            uint32x4_t side;
-            int32x4_t left;
-            int32x4_t right;
-
-            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
-            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
-
-            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
-
-            left  = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
-            right = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
-
-            left  = vshrq_n_s32(left,  16);
-            right = vshrq_n_s32(right, 16);
-
-            drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = (drflac_int16)(((drflac_int32)(mid + side) >> 1) >> 16);
-            pOutputSamples[i*2+1] = (drflac_int16)(((drflac_int32)(mid - side) >> 1) >> 16);
-        }
-    } else {
-        int32x4_t shift4;
-
-        shift -= 1;
-        shift4 = vdupq_n_s32(shift);
-
-        for (i = 0; i < frameCount4; ++i) {
-            uint32x4_t mid;
-            uint32x4_t side;
-            int32x4_t left;
-            int32x4_t right;
-
-            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
-            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
-
-            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
-
-            left  = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
-            right = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
-
-            left  = vshrq_n_s32(left,  16);
-            right = vshrq_n_s32(right, 16);
-
-            drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) << shift) >> 16);
-            pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) << shift) >> 16);
-        }
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s16__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s16__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_s16__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_s16__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    for (drflac_uint64 i = 0; i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (drflac_int16)((drflac_int32)((drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample)) >> 16);
-        pOutputSamples[i*2+1] = (drflac_int16)((drflac_int32)((drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample)) >> 16);
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    for (i = 0; i < frameCount4; ++i) {
-        drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0;
-        drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0;
-        drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0;
-        drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0;
-
-        drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1;
-        drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1;
-        drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1;
-        drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1;
-
-        tempL0 >>= 16;
-        tempL1 >>= 16;
-        tempL2 >>= 16;
-        tempL3 >>= 16;
-
-        tempR0 >>= 16;
-        tempR1 >>= 16;
-        tempR2 >>= 16;
-        tempR3 >>= 16;
-
-        pOutputSamples[i*8+0] = (drflac_int16)tempL0;
-        pOutputSamples[i*8+1] = (drflac_int16)tempR0;
-        pOutputSamples[i*8+2] = (drflac_int16)tempL1;
-        pOutputSamples[i*8+3] = (drflac_int16)tempR1;
-        pOutputSamples[i*8+4] = (drflac_int16)tempL2;
-        pOutputSamples[i*8+5] = (drflac_int16)tempR2;
-        pOutputSamples[i*8+6] = (drflac_int16)tempL3;
-        pOutputSamples[i*8+7] = (drflac_int16)tempR3;
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16);
-        pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16);
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    for (i = 0; i < frameCount4; ++i) {
-        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
-        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
-
-        left  = _mm_srai_epi32(left,  16);
-        right = _mm_srai_epi32(right, 16);
-
-        /* At this point we have results. We can now pack and interleave these into a single __m128i object and then store the in the output buffer. */
-        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16);
-        pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16);
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    int32x4_t shift0_4 = vdupq_n_s32(shift0);
-    int32x4_t shift1_4 = vdupq_n_s32(shift1);
-
-    for (i = 0; i < frameCount4; ++i) {
-        int32x4_t left;
-        int32x4_t right;
-
-        left  = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4));
-        right = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4));
-
-        left  = vshrq_n_s32(left,  16);
-        right = vshrq_n_s32(right, 16);
-
-        drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16);
-        pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16);
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s16__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_s16__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_s16__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_s16__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut)
-{
-    drflac_uint64 framesRead;
-    drflac_uint32 unusedBitsPerSample;
-
-    if (pFlac == NULL || framesToRead == 0) {
-        return 0;
-    }
-
-    if (pBufferOut == NULL) {
-        return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead);
-    }
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 32);
-    unusedBitsPerSample = 32 - pFlac->bitsPerSample;
-
-    framesRead = 0;
-    while (framesToRead > 0) {
-        /* If we've run out of samples in this frame, go to the next. */
-        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
-            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
-                break;  /* Couldn't read the next frame, so just break from the loop and return. */
-            }
-        } else {
-            unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
-            drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining;
-            drflac_uint64 frameCountThisIteration = framesToRead;
-
-            if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) {
-                frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining;
-            }
-
-            if (channelCount == 2) {
-                const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame;
-                const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame;
-
-                switch (pFlac->currentFLACFrame.header.channelAssignment)
-                {
-                    case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE:
-                    {
-                        drflac_read_pcm_frames_s16__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-
-                    case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
-                    {
-                        drflac_read_pcm_frames_s16__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-
-                    case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE:
-                    {
-                        drflac_read_pcm_frames_s16__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-
-                    case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT:
-                    default:
-                    {
-                        drflac_read_pcm_frames_s16__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-                }
-            } else {
-                /* Generic interleaving. */
-                drflac_uint64 i;
-                for (i = 0; i < frameCountThisIteration; ++i) {
-                    unsigned int j;
-                    for (j = 0; j < channelCount; ++j) {
-                        drflac_int32 sampleS32 = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample));
-                        pBufferOut[(i*channelCount)+j] = (drflac_int16)(sampleS32 >> 16);
-                    }
-                }
-            }
-
-            framesRead                += frameCountThisIteration;
-            pBufferOut                += frameCountThisIteration * channelCount;
-            framesToRead              -= frameCountThisIteration;
-            pFlac->currentPCMFrame    += frameCountThisIteration;
-            pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)frameCountThisIteration;
-        }
-    }
-
-    return framesRead;
-}
-
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
-{
-    drflac_uint64 i;
-    for (i = 0; i < frameCount; ++i) {
-        drflac_uint32 left  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-        drflac_uint32 side  = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-        drflac_uint32 right = left - side;
-
-        pOutputSamples[i*2+0] = (float)((drflac_int32)left  / 2147483648.0);
-        pOutputSamples[i*2+1] = (float)((drflac_int32)right / 2147483648.0);
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-    float factor = 1 / 2147483648.0;
-
-    for (i = 0; i < frameCount4; ++i) {
-        drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0;
-        drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0;
-        drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0;
-        drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0;
-
-        drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1;
-        drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1;
-        drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1;
-        drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1;
-
-        drflac_uint32 right0 = left0 - side0;
-        drflac_uint32 right1 = left1 - side1;
-        drflac_uint32 right2 = left2 - side2;
-        drflac_uint32 right3 = left3 - side3;
-
-        pOutputSamples[i*8+0] = (drflac_int32)left0  * factor;
-        pOutputSamples[i*8+1] = (drflac_int32)right0 * factor;
-        pOutputSamples[i*8+2] = (drflac_int32)left1  * factor;
-        pOutputSamples[i*8+3] = (drflac_int32)right1 * factor;
-        pOutputSamples[i*8+4] = (drflac_int32)left2  * factor;
-        pOutputSamples[i*8+5] = (drflac_int32)right2 * factor;
-        pOutputSamples[i*8+6] = (drflac_int32)left3  * factor;
-        pOutputSamples[i*8+7] = (drflac_int32)right3 * factor;
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
-        drflac_uint32 right = left - side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left  * factor;
-        pOutputSamples[i*2+1] = (drflac_int32)right * factor;
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
-    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
-    __m128 factor;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    factor = _mm_set1_ps(1.0f / 8388608.0f);
-
-    for (i = 0; i < frameCount4; ++i) {
-        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
-        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
-        __m128i right = _mm_sub_epi32(left, side);
-        __m128 leftf  = _mm_mul_ps(_mm_cvtepi32_ps(left),  factor);
-        __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor);
-
-        _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
-        _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
-        drflac_uint32 right = left - side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
-        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
-    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
-    float32x4_t factor4;
-    int32x4_t shift0_4;
-    int32x4_t shift1_4;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    factor4  = vdupq_n_f32(1.0f / 8388608.0f);
-    shift0_4 = vdupq_n_s32(shift0);
-    shift1_4 = vdupq_n_s32(shift1);
-
-    for (i = 0; i < frameCount4; ++i) {
-        uint32x4_t left;
-        uint32x4_t side;
-        uint32x4_t right;
-        float32x4_t leftf;
-        float32x4_t rightf;
-
-        left   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
-        side   = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
-        right  = vsubq_u32(left, side);
-        leftf  = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(left)),  factor4);
-        rightf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(right)), factor4);
-
-        drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
-        drflac_uint32 right = left - side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
-        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_f32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_f32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_f32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_f32__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
-{
-    drflac_uint64 i;
-    for (i = 0; i < frameCount; ++i) {
-        drflac_uint32 side  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-        drflac_uint32 right = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-        drflac_uint32 left  = right + side;
-
-        pOutputSamples[i*2+0] = (float)((drflac_int32)left  / 2147483648.0);
-        pOutputSamples[i*2+1] = (float)((drflac_int32)right / 2147483648.0);
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-    float factor = 1 / 2147483648.0;
-
-    for (i = 0; i < frameCount4; ++i) {
-        drflac_uint32 side0  = pInputSamples0U32[i*4+0] << shift0;
-        drflac_uint32 side1  = pInputSamples0U32[i*4+1] << shift0;
-        drflac_uint32 side2  = pInputSamples0U32[i*4+2] << shift0;
-        drflac_uint32 side3  = pInputSamples0U32[i*4+3] << shift0;
-
-        drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1;
-        drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1;
-        drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1;
-        drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1;
-
-        drflac_uint32 left0 = right0 + side0;
-        drflac_uint32 left1 = right1 + side1;
-        drflac_uint32 left2 = right2 + side2;
-        drflac_uint32 left3 = right3 + side3;
-
-        pOutputSamples[i*8+0] = (drflac_int32)left0  * factor;
-        pOutputSamples[i*8+1] = (drflac_int32)right0 * factor;
-        pOutputSamples[i*8+2] = (drflac_int32)left1  * factor;
-        pOutputSamples[i*8+3] = (drflac_int32)right1 * factor;
-        pOutputSamples[i*8+4] = (drflac_int32)left2  * factor;
-        pOutputSamples[i*8+5] = (drflac_int32)right2 * factor;
-        pOutputSamples[i*8+6] = (drflac_int32)left3  * factor;
-        pOutputSamples[i*8+7] = (drflac_int32)right3 * factor;
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 right = pInputSamples1U32[i] << shift1;
-        drflac_uint32 left  = right + side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left  * factor;
-        pOutputSamples[i*2+1] = (drflac_int32)right * factor;
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
-    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
-    __m128 factor;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    factor = _mm_set1_ps(1.0f / 8388608.0f);
-
-    for (i = 0; i < frameCount4; ++i) {
-        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
-        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
-        __m128i left  = _mm_add_epi32(right, side);
-        __m128 leftf  = _mm_mul_ps(_mm_cvtepi32_ps(left),  factor);
-        __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor);
-
-        _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
-        _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 right = pInputSamples1U32[i] << shift1;
-        drflac_uint32 left  = right + side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
-        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
-    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
-    float32x4_t factor4;
-    int32x4_t shift0_4;
-    int32x4_t shift1_4;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    factor4  = vdupq_n_f32(1.0f / 8388608.0f);
-    shift0_4 = vdupq_n_s32(shift0);
-    shift1_4 = vdupq_n_s32(shift1);
-
-    for (i = 0; i < frameCount4; ++i) {
-        uint32x4_t side;
-        uint32x4_t right;
-        uint32x4_t left;
-        float32x4_t leftf;
-        float32x4_t rightf;
-
-        side   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
-        right  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
-        left   = vaddq_u32(right, side);
-        leftf  = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(left)),  factor4);
-        rightf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(right)), factor4);
-
-        drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
-        drflac_uint32 right = pInputSamples1U32[i] << shift1;
-        drflac_uint32 left  = right + side;
-
-        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
-        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_f32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_f32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_f32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_f32__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
-{
-    for (drflac_uint64 i = 0; i < frameCount; ++i) {
-        drflac_uint32 mid  = (drflac_uint32)pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-        drflac_uint32 side = (drflac_uint32)pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-        mid = (mid << 1) | (side & 0x01);
-
-        pOutputSamples[i*2+0] = (float)((((drflac_int32)(mid + side) >> 1) << (unusedBitsPerSample)) / 2147483648.0);
-        pOutputSamples[i*2+1] = (float)((((drflac_int32)(mid - side) >> 1) << (unusedBitsPerSample)) / 2147483648.0);
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift = unusedBitsPerSample;
-    float factor = 1 / 2147483648.0;
-
-    if (shift > 0) {
-        shift -= 1;
-        for (i = 0; i < frameCount4; ++i) {
-            drflac_uint32 temp0L;
-            drflac_uint32 temp1L;
-            drflac_uint32 temp2L;
-            drflac_uint32 temp3L;
-            drflac_uint32 temp0R;
-            drflac_uint32 temp1R;
-            drflac_uint32 temp2R;
-            drflac_uint32 temp3R;
-
-            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-
-            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid0 = (mid0 << 1) | (side0 & 0x01);
-            mid1 = (mid1 << 1) | (side1 & 0x01);
-            mid2 = (mid2 << 1) | (side2 & 0x01);
-            mid3 = (mid3 << 1) | (side3 & 0x01);
-
-            temp0L = (mid0 + side0) << shift;
-            temp1L = (mid1 + side1) << shift;
-            temp2L = (mid2 + side2) << shift;
-            temp3L = (mid3 + side3) << shift;
-
-            temp0R = (mid0 - side0) << shift;
-            temp1R = (mid1 - side1) << shift;
-            temp2R = (mid2 - side2) << shift;
-            temp3R = (mid3 - side3) << shift;
-
-            pOutputSamples[i*8+0] = (drflac_int32)temp0L * factor;
-            pOutputSamples[i*8+1] = (drflac_int32)temp0R * factor;
-            pOutputSamples[i*8+2] = (drflac_int32)temp1L * factor;
-            pOutputSamples[i*8+3] = (drflac_int32)temp1R * factor;
-            pOutputSamples[i*8+4] = (drflac_int32)temp2L * factor;
-            pOutputSamples[i*8+5] = (drflac_int32)temp2R * factor;
-            pOutputSamples[i*8+6] = (drflac_int32)temp3L * factor;
-            pOutputSamples[i*8+7] = (drflac_int32)temp3R * factor;
-        }
-    } else {
-        for (i = 0; i < frameCount4; ++i) {
-            drflac_uint32 temp0L;
-            drflac_uint32 temp1L;
-            drflac_uint32 temp2L;
-            drflac_uint32 temp3L;
-            drflac_uint32 temp0R;
-            drflac_uint32 temp1R;
-            drflac_uint32 temp2R;
-            drflac_uint32 temp3R;
-
-            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-
-            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid0 = (mid0 << 1) | (side0 & 0x01);
-            mid1 = (mid1 << 1) | (side1 & 0x01);
-            mid2 = (mid2 << 1) | (side2 & 0x01);
-            mid3 = (mid3 << 1) | (side3 & 0x01);
-
-            temp0L = (drflac_uint32)((drflac_int32)(mid0 + side0) >> 1);
-            temp1L = (drflac_uint32)((drflac_int32)(mid1 + side1) >> 1);
-            temp2L = (drflac_uint32)((drflac_int32)(mid2 + side2) >> 1);
-            temp3L = (drflac_uint32)((drflac_int32)(mid3 + side3) >> 1);
-
-            temp0R = (drflac_uint32)((drflac_int32)(mid0 - side0) >> 1);
-            temp1R = (drflac_uint32)((drflac_int32)(mid1 - side1) >> 1);
-            temp2R = (drflac_uint32)((drflac_int32)(mid2 - side2) >> 1);
-            temp3R = (drflac_uint32)((drflac_int32)(mid3 - side3) >> 1);
-
-            pOutputSamples[i*8+0] = (drflac_int32)temp0L * factor;
-            pOutputSamples[i*8+1] = (drflac_int32)temp0R * factor;
-            pOutputSamples[i*8+2] = (drflac_int32)temp1L * factor;
-            pOutputSamples[i*8+3] = (drflac_int32)temp1R * factor;
-            pOutputSamples[i*8+4] = (drflac_int32)temp2L * factor;
-            pOutputSamples[i*8+5] = (drflac_int32)temp2R * factor;
-            pOutputSamples[i*8+6] = (drflac_int32)temp3L * factor;
-            pOutputSamples[i*8+7] = (drflac_int32)temp3R * factor;
-        }
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-        mid = (mid << 1) | (side & 0x01);
-
-        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) * factor;
-        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) * factor;
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift = unusedBitsPerSample - 8;
-    float factor;
-    __m128 factor128;
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    factor = 1.0f / 8388608.0f;
-    factor128 = _mm_set1_ps(factor);
-
-    if (shift == 0) {
-        for (i = 0; i < frameCount4; ++i) {
-            __m128i mid;
-            __m128i side;
-            __m128i tempL;
-            __m128i tempR;
-            __m128  leftf;
-            __m128  rightf;
-
-            mid    = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-            side   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-
-            mid    = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
-
-            tempL  = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
-            tempR  = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
-
-            leftf  = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128);
-            rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128);
-
-            _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
-            _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = ((drflac_int32)(mid + side) >> 1) * factor;
-            pOutputSamples[i*2+1] = ((drflac_int32)(mid - side) >> 1) * factor;
-        }
-    } else {
-        shift -= 1;
-        for (i = 0; i < frameCount4; ++i) {
-            __m128i mid;
-            __m128i side;
-            __m128i tempL;
-            __m128i tempR;
-            __m128 leftf;
-            __m128 rightf;
-
-            mid    = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-            side   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-
-            mid    = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
-
-            tempL  = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
-            tempR  = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
-
-            leftf  = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128);
-            rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128);
-
-            _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
-            _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift) * factor;
-            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift) * factor;
-        }
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift = unusedBitsPerSample - 8;
-    float factor;
-    float32x4_t factor4;
-    int32x4_t shift4;
-    int32x4_t wbps0_4;  /* Wasted Bits Per Sample */
-    int32x4_t wbps1_4;  /* Wasted Bits Per Sample */
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
-
-    factor  = 1.0f / 8388608.0f;
-    factor4 = vdupq_n_f32(factor);
-    wbps0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
-    wbps1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
-
-    if (shift == 0) {
-        for (i = 0; i < frameCount4; ++i) {
-            int32x4_t lefti;
-            int32x4_t righti;
-            float32x4_t leftf;
-            float32x4_t rightf;
-
-            uint32x4_t mid  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbps0_4);
-            uint32x4_t side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbps1_4);
-
-            mid    = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
-
-            lefti  = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
-            righti = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
-
-            leftf  = vmulq_f32(vcvtq_f32_s32(lefti),  factor4);
-            rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
-
-            drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = ((drflac_int32)(mid + side) >> 1) * factor;
-            pOutputSamples[i*2+1] = ((drflac_int32)(mid - side) >> 1) * factor;
-        }
-    } else {
-        shift -= 1;
-        shift4 = vdupq_n_s32(shift);
-        for (i = 0; i < frameCount4; ++i) {
-            uint32x4_t mid;
-            uint32x4_t side;
-            int32x4_t lefti;
-            int32x4_t righti;
-            float32x4_t leftf;
-            float32x4_t rightf;
-
-            mid    = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbps0_4);
-            side   = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbps1_4);
-
-            mid    = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
-
-            lefti  = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
-            righti = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
-
-            leftf  = vmulq_f32(vcvtq_f32_s32(lefti),  factor4);
-            rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
-
-            drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
-        }
-
-        for (i = (frameCount4 << 2); i < frameCount; ++i) {
-            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-
-            mid = (mid << 1) | (side & 0x01);
-
-            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift) * factor;
-            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift) * factor;
-        }
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_f32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_f32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_f32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_f32__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-#if 0
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
-{
-    for (drflac_uint64 i = 0; i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (float)((drflac_int32)((drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample)) / 2147483648.0);
-        pOutputSamples[i*2+1] = (float)((drflac_int32)((drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample)) / 2147483648.0);
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
-    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
-    float factor = 1 / 2147483648.0;
-
-    for (i = 0; i < frameCount4; ++i) {
-        drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0;
-        drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0;
-        drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0;
-        drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0;
-
-        drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1;
-        drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1;
-        drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1;
-        drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1;
-
-        pOutputSamples[i*8+0] = (drflac_int32)tempL0 * factor;
-        pOutputSamples[i*8+1] = (drflac_int32)tempR0 * factor;
-        pOutputSamples[i*8+2] = (drflac_int32)tempL1 * factor;
-        pOutputSamples[i*8+3] = (drflac_int32)tempR1 * factor;
-        pOutputSamples[i*8+4] = (drflac_int32)tempL2 * factor;
-        pOutputSamples[i*8+5] = (drflac_int32)tempR2 * factor;
-        pOutputSamples[i*8+6] = (drflac_int32)tempL3 * factor;
-        pOutputSamples[i*8+7] = (drflac_int32)tempR3 * factor;
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor;
-        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor;
-    }
-}
-
-#if defined(DRFLAC_SUPPORT_SSE2)
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
-    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
-
-    float factor = 1.0f / 8388608.0f;
-    __m128 factor128 = _mm_set1_ps(factor);
-
-    for (i = 0; i < frameCount4; ++i) {
-        __m128i lefti;
-        __m128i righti;
-        __m128 leftf;
-        __m128 rightf;
-
-        lefti  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
-        righti = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
-
-        leftf  = _mm_mul_ps(_mm_cvtepi32_ps(lefti),  factor128);
-        rightf = _mm_mul_ps(_mm_cvtepi32_ps(righti), factor128);
-
-        _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
-        _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor;
-        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor;
-    }
-}
-#endif
-
-#if defined(DRFLAC_SUPPORT_NEON)
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
-{
-    drflac_uint64 i;
-    drflac_uint64 frameCount4 = frameCount >> 2;
-    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
-    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
-    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
-    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
-
-    float factor = 1.0f / 8388608.0f;
-    float32x4_t factor4 = vdupq_n_f32(factor);
-    int32x4_t shift0_4  = vdupq_n_s32(shift0);
-    int32x4_t shift1_4  = vdupq_n_s32(shift1);
-
-    for (i = 0; i < frameCount4; ++i) {
-        int32x4_t lefti;
-        int32x4_t righti;
-        float32x4_t leftf;
-        float32x4_t rightf;
-
-        lefti  = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4));
-        righti = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4));
-
-        leftf  = vmulq_f32(vcvtq_f32_s32(lefti),  factor4);
-        rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
-
-        drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
-    }
-
-    for (i = (frameCount4 << 2); i < frameCount; ++i) {
-        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor;
-        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor;
-    }
-}
-#endif
-
-static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
-{
-#if defined(DRFLAC_SUPPORT_SSE2)
-    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#elif defined(DRFLAC_SUPPORT_NEON)
-    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
-        drflac_read_pcm_frames_f32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-    } else
-#endif
-    {
-        /* Scalar fallback. */
-#if 0
-        drflac_read_pcm_frames_f32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#else
-        drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
-#endif
-    }
-}
-
-DRFLAC_API drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, float* pBufferOut)
-{
-    drflac_uint64 framesRead;
-    drflac_uint32 unusedBitsPerSample;
-
-    if (pFlac == NULL || framesToRead == 0) {
-        return 0;
-    }
-
-    if (pBufferOut == NULL) {
-        return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead);
-    }
-
-    DRFLAC_ASSERT(pFlac->bitsPerSample <= 32);
-    unusedBitsPerSample = 32 - pFlac->bitsPerSample;
-
-    framesRead = 0;
-    while (framesToRead > 0) {
-        /* If we've run out of samples in this frame, go to the next. */
-        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
-            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
-                break;  /* Couldn't read the next frame, so just break from the loop and return. */
-            }
-        } else {
-            unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
-            drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining;
-            drflac_uint64 frameCountThisIteration = framesToRead;
-
-            if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) {
-                frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining;
-            }
-
-            if (channelCount == 2) {
-                const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame;
-                const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame;
-
-                switch (pFlac->currentFLACFrame.header.channelAssignment)
-                {
-                    case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE:
-                    {
-                        drflac_read_pcm_frames_f32__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-
-                    case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
-                    {
-                        drflac_read_pcm_frames_f32__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-
-                    case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE:
-                    {
-                        drflac_read_pcm_frames_f32__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-
-                    case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT:
-                    default:
-                    {
-                        drflac_read_pcm_frames_f32__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
-                    } break;
-                }
-            } else {
-                /* Generic interleaving. */
-                drflac_uint64 i;
-                for (i = 0; i < frameCountThisIteration; ++i) {
-                    unsigned int j;
-                    for (j = 0; j < channelCount; ++j) {
-                        drflac_int32 sampleS32 = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample));
-                        pBufferOut[(i*channelCount)+j] = (float)(sampleS32 / 2147483648.0);
-                    }
-                }
-            }
-
-            framesRead                += frameCountThisIteration;
-            pBufferOut                += frameCountThisIteration * channelCount;
-            framesToRead              -= frameCountThisIteration;
-            pFlac->currentPCMFrame    += frameCountThisIteration;
-            pFlac->currentFLACFrame.pcmFramesRemaining -= (unsigned int)frameCountThisIteration;
-        }
-    }
-
-    return framesRead;
-}
-
-
-DRFLAC_API drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex)
-{
-    if (pFlac == NULL) {
-        return DRFLAC_FALSE;
-    }
-
-    /* Don't do anything if we're already on the seek point. */
-    if (pFlac->currentPCMFrame == pcmFrameIndex) {
-        return DRFLAC_TRUE;
-    }
-
-    /*
-    If we don't know where the first frame begins then we can't seek. This will happen when the STREAMINFO block was not present
-    when the decoder was opened.
-    */
-    if (pFlac->firstFLACFramePosInBytes == 0) {
-        return DRFLAC_FALSE;
-    }
-
-    if (pcmFrameIndex == 0) {
-        pFlac->currentPCMFrame = 0;
-        return drflac__seek_to_first_frame(pFlac);
-    } else {
-        drflac_bool32 wasSuccessful = DRFLAC_FALSE;
-
-        /* Clamp the sample to the end. */
-        if (pcmFrameIndex > pFlac->totalPCMFrameCount) {
-            pcmFrameIndex = pFlac->totalPCMFrameCount;
-        }
-
-        /* If the target sample and the current sample are in the same frame we just move the position forward. */
-        if (pcmFrameIndex > pFlac->currentPCMFrame) {
-            /* Forward. */
-            drflac_uint32 offset = (drflac_uint32)(pcmFrameIndex - pFlac->currentPCMFrame);
-            if (pFlac->currentFLACFrame.pcmFramesRemaining >  offset) {
-                pFlac->currentFLACFrame.pcmFramesRemaining -= offset;
-                pFlac->currentPCMFrame = pcmFrameIndex;
-                return DRFLAC_TRUE;
-            }
-        } else {
-            /* Backward. */
-            drflac_uint32 offsetAbs = (drflac_uint32)(pFlac->currentPCMFrame - pcmFrameIndex);
-            drflac_uint32 currentFLACFramePCMFrameCount = pFlac->currentFLACFrame.header.blockSizeInPCMFrames;
-            drflac_uint32 currentFLACFramePCMFramesConsumed = currentFLACFramePCMFrameCount - pFlac->currentFLACFrame.pcmFramesRemaining;
-            if (currentFLACFramePCMFramesConsumed > offsetAbs) {
-                pFlac->currentFLACFrame.pcmFramesRemaining += offsetAbs;
-                pFlac->currentPCMFrame = pcmFrameIndex;
-                return DRFLAC_TRUE;
-            }
-        }
-
-        /*
-        Different techniques depending on encapsulation. Using the native FLAC seektable with Ogg encapsulation is a bit awkward so
-        we'll instead use Ogg's natural seeking facility.
-        */
-#ifndef DR_FLAC_NO_OGG
-        if (pFlac->container == drflac_container_ogg)
-        {
-            wasSuccessful = drflac_ogg__seek_to_pcm_frame(pFlac, pcmFrameIndex);
-        }
-        else
-#endif
-        {
-            /* First try seeking via the seek table. If this fails, fall back to a brute force seek which is much slower. */
-            if (/*!wasSuccessful && */!pFlac->_noSeekTableSeek) {
-                wasSuccessful = drflac__seek_to_pcm_frame__seek_table(pFlac, pcmFrameIndex);
-            }
-
-#if !defined(DR_FLAC_NO_CRC)
-            /* Fall back to binary search if seek table seeking fails. This requires the length of the stream to be known. */
-            if (!wasSuccessful && !pFlac->_noBinarySearchSeek && pFlac->totalPCMFrameCount > 0) {
-                wasSuccessful = drflac__seek_to_pcm_frame__binary_search(pFlac, pcmFrameIndex);
-            }
-#endif
-
-            /* Fall back to brute force if all else fails. */
-            if (!wasSuccessful && !pFlac->_noBruteForceSeek) {
-                wasSuccessful = drflac__seek_to_pcm_frame__brute_force(pFlac, pcmFrameIndex);
-            }
-        }
-
-        pFlac->currentPCMFrame = pcmFrameIndex;
-        return wasSuccessful;
-    }
-}
-
-
-
-/* High Level APIs */
-
-#if defined(SIZE_MAX)
-    #define DRFLAC_SIZE_MAX  SIZE_MAX
-#else
-    #if defined(DRFLAC_64BIT)
-        #define DRFLAC_SIZE_MAX  ((drflac_uint64)0xFFFFFFFFFFFFFFFF)
-    #else
-        #define DRFLAC_SIZE_MAX  0xFFFFFFFF
-    #endif
-#endif
-
-
-/* Using a macro as the definition of the drflac__full_decode_and_close_*() API family. Sue me. */
-#define DRFLAC_DEFINE_FULL_READ_AND_CLOSE(extension, type) \
-static type* drflac__full_read_and_close_ ## extension (drflac* pFlac, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut)\
-{                                                                                                                                                                   \
-    type* pSampleData = NULL;                                                                                                                                       \
-    drflac_uint64 totalPCMFrameCount;                                                                                                                               \
-                                                                                                                                                                    \
-    DRFLAC_ASSERT(pFlac != NULL);                                                                                                                                   \
-                                                                                                                                                                    \
-    totalPCMFrameCount = pFlac->totalPCMFrameCount;                                                                                                                 \
-                                                                                                                                                                    \
-    if (totalPCMFrameCount == 0) {                                                                                                                                  \
-        type buffer[4096];                                                                                                                                          \
-        drflac_uint64 pcmFramesRead;                                                                                                                                \
-        size_t sampleDataBufferSize = sizeof(buffer);                                                                                                               \
-                                                                                                                                                                    \
-        pSampleData = (type*)drflac__malloc_from_callbacks(sampleDataBufferSize, &pFlac->allocationCallbacks);                                                      \
-        if (pSampleData == NULL) {                                                                                                                                  \
-            goto on_error;                                                                                                                                          \
-        }                                                                                                                                                           \
-                                                                                                                                                                    \
-        while ((pcmFramesRead = (drflac_uint64)drflac_read_pcm_frames_##extension(pFlac, sizeof(buffer)/sizeof(buffer[0])/pFlac->channels, buffer)) > 0) {          \
-            if (((totalPCMFrameCount + pcmFramesRead) * pFlac->channels * sizeof(type)) > sampleDataBufferSize) {                                                   \
-                type* pNewSampleData;                                                                                                                               \
-                size_t newSampleDataBufferSize;                                                                                                                     \
-                                                                                                                                                                    \
-                newSampleDataBufferSize = sampleDataBufferSize * 2;                                                                                                 \
-                pNewSampleData = (type*)drflac__realloc_from_callbacks(pSampleData, newSampleDataBufferSize, sampleDataBufferSize, &pFlac->allocationCallbacks);    \
-                if (pNewSampleData == NULL) {                                                                                                                       \
-                    drflac__free_from_callbacks(pSampleData, &pFlac->allocationCallbacks);                                                                          \
-                    goto on_error;                                                                                                                                  \
-                }                                                                                                                                                   \
-                                                                                                                                                                    \
-                sampleDataBufferSize = newSampleDataBufferSize;                                                                                                     \
-                pSampleData = pNewSampleData;                                                                                                                       \
-            }                                                                                                                                                       \
-                                                                                                                                                                    \
-            DRFLAC_COPY_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), buffer, (size_t)(pcmFramesRead*pFlac->channels*sizeof(type)));                   \
-            totalPCMFrameCount += pcmFramesRead;                                                                                                                    \
-        }                                                                                                                                                           \
-                                                                                                                                                                    \
-        /* At this point everything should be decoded, but we just want to fill the unused part buffer with silence - need to                                       \
-           protect those ears from random noise! */                                                                                                                 \
-        DRFLAC_ZERO_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), (size_t)(sampleDataBufferSize - totalPCMFrameCount*pFlac->channels*sizeof(type)));   \
-    } else {                                                                                                                                                        \
-        drflac_uint64 dataSize = totalPCMFrameCount*pFlac->channels*sizeof(type);                                                                                   \
-        if (dataSize > DRFLAC_SIZE_MAX) {                                                                                                                           \
-            goto on_error;  /* The decoded data is too big. */                                                                                                      \
-        }                                                                                                                                                           \
-                                                                                                                                                                    \
-        pSampleData = (type*)drflac__malloc_from_callbacks((size_t)dataSize, &pFlac->allocationCallbacks);    /* <-- Safe cast as per the check above. */           \
-        if (pSampleData == NULL) {                                                                                                                                  \
-            goto on_error;                                                                                                                                          \
-        }                                                                                                                                                           \
-                                                                                                                                                                    \
-        totalPCMFrameCount = drflac_read_pcm_frames_##extension(pFlac, pFlac->totalPCMFrameCount, pSampleData);                                                     \
-    }                                                                                                                                                               \
-                                                                                                                                                                    \
-    if (sampleRateOut) *sampleRateOut = pFlac->sampleRate;                                                                                                          \
-    if (channelsOut) *channelsOut = pFlac->channels;                                                                                                                \
-    if (totalPCMFrameCountOut) *totalPCMFrameCountOut = totalPCMFrameCount;                                                                                         \
-                                                                                                                                                                    \
-    drflac_close(pFlac);                                                                                                                                            \
-    return pSampleData;                                                                                                                                             \
-                                                                                                                                                                    \
-on_error:                                                                                                                                                           \
-    drflac_close(pFlac);                                                                                                                                            \
-    return NULL;                                                                                                                                                    \
-}
-
-DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s32, drflac_int32)
-DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s16, drflac_int16)
-DRFLAC_DEFINE_FULL_READ_AND_CLOSE(f32, float)
-
-DRFLAC_API drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalPCMFrameCountOut) {
-        *totalPCMFrameCountOut = 0;
-    }
-
-    pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    return drflac__full_read_and_close_s32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
-}
-
-DRFLAC_API drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalPCMFrameCountOut) {
-        *totalPCMFrameCountOut = 0;
-    }
-
-    pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    return drflac__full_read_and_close_s16(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
-}
-
-DRFLAC_API float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalPCMFrameCountOut) {
-        *totalPCMFrameCountOut = 0;
-    }
-
-    pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    return drflac__full_read_and_close_f32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
-}
-
-#ifndef DR_FLAC_NO_STDIO
-DRFLAC_API drflac_int32* drflac_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-
-    if (sampleRate) {
-        *sampleRate = 0;
-    }
-    if (channels) {
-        *channels = 0;
-    }
-    if (totalPCMFrameCount) {
-        *totalPCMFrameCount = 0;
-    }
-
-    pFlac = drflac_open_file(filename, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount);
-}
-
-DRFLAC_API drflac_int16* drflac_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-
-    if (sampleRate) {
-        *sampleRate = 0;
-    }
-    if (channels) {
-        *channels = 0;
-    }
-    if (totalPCMFrameCount) {
-        *totalPCMFrameCount = 0;
-    }
-
-    pFlac = drflac_open_file(filename, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount);
-}
-
-DRFLAC_API float* drflac_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-
-    if (sampleRate) {
-        *sampleRate = 0;
-    }
-    if (channels) {
-        *channels = 0;
-    }
-    if (totalPCMFrameCount) {
-        *totalPCMFrameCount = 0;
-    }
-
-    pFlac = drflac_open_file(filename, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount);
-}
-#endif
-
-DRFLAC_API drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-
-    if (sampleRate) {
-        *sampleRate = 0;
-    }
-    if (channels) {
-        *channels = 0;
-    }
-    if (totalPCMFrameCount) {
-        *totalPCMFrameCount = 0;
-    }
-
-    pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount);
-}
-
-DRFLAC_API drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-
-    if (sampleRate) {
-        *sampleRate = 0;
-    }
-    if (channels) {
-        *channels = 0;
-    }
-    if (totalPCMFrameCount) {
-        *totalPCMFrameCount = 0;
-    }
-
-    pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount);
-}
-
-DRFLAC_API float* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    drflac* pFlac;
-
-    if (sampleRate) {
-        *sampleRate = 0;
-    }
-    if (channels) {
-        *channels = 0;
-    }
-    if (totalPCMFrameCount) {
-        *totalPCMFrameCount = 0;
-    }
-
-    pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks);
-    if (pFlac == NULL) {
-        return NULL;
-    }
-
-    return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount);
-}
-
-
-DRFLAC_API void drflac_free(void* p, const drflac_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pAllocationCallbacks != NULL) {
-        drflac__free_from_callbacks(p, pAllocationCallbacks);
-    } else {
-        drflac__free_default(p, NULL);
-    }
-}
-
-
-
-
-DRFLAC_API void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const void* pComments)
-{
-    if (pIter == NULL) {
-        return;
-    }
-
-    pIter->countRemaining = commentCount;
-    pIter->pRunningData   = (const char*)pComments;
-}
-
-DRFLAC_API const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut)
-{
-    drflac_int32 length;
-    const char* pComment;
-
-    /* Safety. */
-    if (pCommentLengthOut) {
-        *pCommentLengthOut = 0;
-    }
-
-    if (pIter == NULL || pIter->countRemaining == 0 || pIter->pRunningData == NULL) {
-        return NULL;
-    }
-
-    length = drflac__le2host_32(*(const drflac_uint32*)pIter->pRunningData);
-    pIter->pRunningData += 4;
-
-    pComment = pIter->pRunningData;
-    pIter->pRunningData += length;
-    pIter->countRemaining -= 1;
-
-    if (pCommentLengthOut) {
-        *pCommentLengthOut = length;
-    }
-
-    return pComment;
-}
-
-
-
-
-DRFLAC_API void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_uint32 trackCount, const void* pTrackData)
-{
-    if (pIter == NULL) {
-        return;
-    }
-
-    pIter->countRemaining = trackCount;
-    pIter->pRunningData   = (const char*)pTrackData;
-}
-
-DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack)
-{
-    drflac_cuesheet_track cuesheetTrack;
-    const char* pRunningData;
-    drflac_uint64 offsetHi;
-    drflac_uint64 offsetLo;
-
-    if (pIter == NULL || pIter->countRemaining == 0 || pIter->pRunningData == NULL) {
-        return DRFLAC_FALSE;
-    }
-
-    pRunningData = pIter->pRunningData;
-
-    offsetHi                   = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-    offsetLo                   = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
-    cuesheetTrack.offset       = offsetLo | (offsetHi << 32);
-    cuesheetTrack.trackNumber  = pRunningData[0];                                         pRunningData += 1;
-    DRFLAC_COPY_MEMORY(cuesheetTrack.ISRC, pRunningData, sizeof(cuesheetTrack.ISRC));     pRunningData += 12;
-    cuesheetTrack.isAudio      = (pRunningData[0] & 0x80) != 0;
-    cuesheetTrack.preEmphasis  = (pRunningData[0] & 0x40) != 0;                           pRunningData += 14;
-    cuesheetTrack.indexCount   = pRunningData[0];                                         pRunningData += 1;
-    cuesheetTrack.pIndexPoints = (const drflac_cuesheet_track_index*)pRunningData;        pRunningData += cuesheetTrack.indexCount * sizeof(drflac_cuesheet_track_index);
-
-    pIter->pRunningData = pRunningData;
-    pIter->countRemaining -= 1;
-
-    if (pCuesheetTrack) {
-        *pCuesheetTrack = cuesheetTrack;
-    }
-
-    return DRFLAC_TRUE;
-}
-
-#if defined(__GNUC__)
-    #pragma GCC diagnostic pop
-#endif
-#endif  /* DR_FLAC_IMPLEMENTATION */
-
-
-/*
-REVISION HISTORY
-================
-v0.12.13 - 2020-05-16
-  - Add compile-time and run-time version querying.
-    - DRFLAC_VERSION_MINOR
-    - DRFLAC_VERSION_MAJOR
-    - DRFLAC_VERSION_REVISION
-    - DRFLAC_VERSION_STRING
-    - drflac_version()
-    - drflac_version_string()
-
-v0.12.12 - 2020-04-30
-  - Fix compilation errors with VC6.
-
-v0.12.11 - 2020-04-19
-  - Fix some pedantic warnings.
-  - Fix some undefined behaviour warnings.
-
-v0.12.10 - 2020-04-10
-  - Fix some bugs when trying to seek with an invalid seek table.
-
-v0.12.9 - 2020-04-05
-  - Fix warnings.
-
-v0.12.8 - 2020-04-04
-  - Add drflac_open_file_w() and drflac_open_file_with_metadata_w().
-  - Fix some static analysis warnings.
-  - Minor documentation updates.
-
-v0.12.7 - 2020-03-14
-  - Fix compilation errors with VC6.
-
-v0.12.6 - 2020-03-07
-  - Fix compilation error with Visual Studio .NET 2003.
-
-v0.12.5 - 2020-01-30
-  - Silence some static analysis warnings.
-
-v0.12.4 - 2020-01-29
-  - Silence some static analysis warnings.
-
-v0.12.3 - 2019-12-02
-  - Fix some warnings when compiling with GCC and the -Og flag.
-  - Fix a crash in out-of-memory situations.
-  - Fix potential integer overflow bug.
-  - Fix some static analysis warnings.
-  - Fix a possible crash when using custom memory allocators without a custom realloc() implementation.
-  - Fix a bug with binary search seeking where the bits per sample is not a multiple of 8.
-
-v0.12.2 - 2019-10-07
-  - Internal code clean up.
-
-v0.12.1 - 2019-09-29
-  - Fix some Clang Static Analyzer warnings.
-  - Fix an unused variable warning.
-
-v0.12.0 - 2019-09-23
-  - API CHANGE: Add support for user defined memory allocation routines. This system allows the program to specify their own memory allocation
-    routines with a user data pointer for client-specific contextual data. This adds an extra parameter to the end of the following APIs:
-    - drflac_open()
-    - drflac_open_relaxed()
-    - drflac_open_with_metadata()
-    - drflac_open_with_metadata_relaxed()
-    - drflac_open_file()
-    - drflac_open_file_with_metadata()
-    - drflac_open_memory()
-    - drflac_open_memory_with_metadata()
-    - drflac_open_and_read_pcm_frames_s32()
-    - drflac_open_and_read_pcm_frames_s16()
-    - drflac_open_and_read_pcm_frames_f32()
-    - drflac_open_file_and_read_pcm_frames_s32()
-    - drflac_open_file_and_read_pcm_frames_s16()
-    - drflac_open_file_and_read_pcm_frames_f32()
-    - drflac_open_memory_and_read_pcm_frames_s32()
-    - drflac_open_memory_and_read_pcm_frames_s16()
-    - drflac_open_memory_and_read_pcm_frames_f32()
-    Set this extra parameter to NULL to use defaults which is the same as the previous behaviour. Setting this NULL will use
-    DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE.
-  - Remove deprecated APIs:
-    - drflac_read_s32()
-    - drflac_read_s16()
-    - drflac_read_f32()
-    - drflac_seek_to_sample()
-    - drflac_open_and_decode_s32()
-    - drflac_open_and_decode_s16()
-    - drflac_open_and_decode_f32()
-    - drflac_open_and_decode_file_s32()
-    - drflac_open_and_decode_file_s16()
-    - drflac_open_and_decode_file_f32()
-    - drflac_open_and_decode_memory_s32()
-    - drflac_open_and_decode_memory_s16()
-    - drflac_open_and_decode_memory_f32()
-  - Remove drflac.totalSampleCount which is now replaced with drflac.totalPCMFrameCount. You can emulate drflac.totalSampleCount
-    by doing pFlac->totalPCMFrameCount*pFlac->channels.
-  - Rename drflac.currentFrame to drflac.currentFLACFrame to remove ambiguity with PCM frames.
-  - Fix errors when seeking to the end of a stream.
-  - Optimizations to seeking.
-  - SSE improvements and optimizations.
-  - ARM NEON optimizations.
-  - Optimizations to drflac_read_pcm_frames_s16().
-  - Optimizations to drflac_read_pcm_frames_s32().
-
-v0.11.10 - 2019-06-26
-  - Fix a compiler error.
-
-v0.11.9 - 2019-06-16
-  - Silence some ThreadSanitizer warnings.
-
-v0.11.8 - 2019-05-21
-  - Fix warnings.
-
-v0.11.7 - 2019-05-06
-  - C89 fixes.
-
-v0.11.6 - 2019-05-05
-  - Add support for C89.
-  - Fix a compiler warning when CRC is disabled.
-  - Change license to choice of public domain or MIT-0.
-
-v0.11.5 - 2019-04-19
-  - Fix a compiler error with GCC.
-
-v0.11.4 - 2019-04-17
-  - Fix some warnings with GCC when compiling with -std=c99.
-
-v0.11.3 - 2019-04-07
-  - Silence warnings with GCC.
-
-v0.11.2 - 2019-03-10
-  - Fix a warning.
-
-v0.11.1 - 2019-02-17
-  - Fix a potential bug with seeking.
-
-v0.11.0 - 2018-12-16
-  - API CHANGE: Deprecated drflac_read_s32(), drflac_read_s16() and drflac_read_f32() and replaced them with
-    drflac_read_pcm_frames_s32(), drflac_read_pcm_frames_s16() and drflac_read_pcm_frames_f32(). The new APIs take
-    and return PCM frame counts instead of sample counts. To upgrade you will need to change the input count by
-    dividing it by the channel count, and then do the same with the return value.
-  - API_CHANGE: Deprecated drflac_seek_to_sample() and replaced with drflac_seek_to_pcm_frame(). Same rules as
-    the changes to drflac_read_*() apply.
-  - API CHANGE: Deprecated drflac_open_and_decode_*() and replaced with drflac_open_*_and_read_*(). Same rules as
-    the changes to drflac_read_*() apply.
-  - Optimizations.
-
-v0.10.0 - 2018-09-11
-  - Remove the DR_FLAC_NO_WIN32_IO option and the Win32 file IO functionality. If you need to use Win32 file IO you
-    need to do it yourself via the callback API.
-  - Fix the clang build.
-  - Fix undefined behavior.
-  - Fix errors with CUESHEET metdata blocks.
-  - Add an API for iterating over each cuesheet track in the CUESHEET metadata block. This works the same way as the
-    Vorbis comment API.
-  - Other miscellaneous bug fixes, mostly relating to invalid FLAC streams.
-  - Minor optimizations.
-
-v0.9.11 - 2018-08-29
-  - Fix a bug with sample reconstruction.
-
-v0.9.10 - 2018-08-07
-  - Improve 64-bit detection.
-
-v0.9.9 - 2018-08-05
-  - Fix C++ build on older versions of GCC.
-
-v0.9.8 - 2018-07-24
-  - Fix compilation errors.
-
-v0.9.7 - 2018-07-05
-  - Fix a warning.
-
-v0.9.6 - 2018-06-29
-  - Fix some typos.
-
-v0.9.5 - 2018-06-23
-  - Fix some warnings.
-
-v0.9.4 - 2018-06-14
-  - Optimizations to seeking.
-  - Clean up.
-
-v0.9.3 - 2018-05-22
-  - Bug fix.
-
-v0.9.2 - 2018-05-12
-  - Fix a compilation error due to a missing break statement.
-
-v0.9.1 - 2018-04-29
-  - Fix compilation error with Clang.
-
-v0.9 - 2018-04-24
-  - Fix Clang build.
-  - Start using major.minor.revision versioning.
-
-v0.8g - 2018-04-19
-  - Fix build on non-x86/x64 architectures.
-
-v0.8f - 2018-02-02
-  - Stop pretending to support changing rate/channels mid stream.
-
-v0.8e - 2018-02-01
-  - Fix a crash when the block size of a frame is larger than the maximum block size defined by the FLAC stream.
-  - Fix a crash the the Rice partition order is invalid.
-
-v0.8d - 2017-09-22
-  - Add support for decoding streams with ID3 tags. ID3 tags are just skipped.
-
-v0.8c - 2017-09-07
-  - Fix warning on non-x86/x64 architectures.
-
-v0.8b - 2017-08-19
-  - Fix build on non-x86/x64 architectures.
-
-v0.8a - 2017-08-13
-  - A small optimization for the Clang build.
-
-v0.8 - 2017-08-12
-  - API CHANGE: Rename dr_* types to drflac_*.
-  - Optimizations. This brings dr_flac back to about the same class of efficiency as the reference implementation.
-  - Add support for custom implementations of malloc(), realloc(), etc.
-  - Add CRC checking to Ogg encapsulated streams.
-  - Fix VC++ 6 build. This is only for the C++ compiler. The C compiler is not currently supported.
-  - Bug fixes.
-
-v0.7 - 2017-07-23
-  - Add support for opening a stream without a header block. To do this, use drflac_open_relaxed() / drflac_open_with_metadata_relaxed().
-
-v0.6 - 2017-07-22
-  - Add support for recovering from invalid frames. With this change, dr_flac will simply skip over invalid frames as if they
-    never existed. Frames are checked against their sync code, the CRC-8 of the frame header and the CRC-16 of the whole frame.
-
-v0.5 - 2017-07-16
-  - Fix typos.
-  - Change drflac_bool* types to unsigned.
-  - Add CRC checking. This makes dr_flac slower, but can be disabled with #define DR_FLAC_NO_CRC.
-
-v0.4f - 2017-03-10
-  - Fix a couple of bugs with the bitstreaming code.
-
-v0.4e - 2017-02-17
-  - Fix some warnings.
-
-v0.4d - 2016-12-26
-  - Add support for 32-bit floating-point PCM decoding.
-  - Use drflac_int* and drflac_uint* sized types to improve compiler support.
-  - Minor improvements to documentation.
-
-v0.4c - 2016-12-26
-  - Add support for signed 16-bit integer PCM decoding.
-
-v0.4b - 2016-10-23
-  - A minor change to drflac_bool8 and drflac_bool32 types.
-
-v0.4a - 2016-10-11
-  - Rename drBool32 to drflac_bool32 for styling consistency.
-
-v0.4 - 2016-09-29
-  - API/ABI CHANGE: Use fixed size 32-bit booleans instead of the built-in bool type.
-  - API CHANGE: Rename drflac_open_and_decode*() to drflac_open_and_decode*_s32().
-  - API CHANGE: Swap the order of "channels" and "sampleRate" parameters in drflac_open_and_decode*(). Rationale for this is to
-    keep it consistent with drflac_audio.
-
-v0.3f - 2016-09-21
-  - Fix a warning with GCC.
-
-v0.3e - 2016-09-18
-  - Fixed a bug where GCC 4.3+ was not getting properly identified.
-  - Fixed a few typos.
-  - Changed date formats to ISO 8601 (YYYY-MM-DD).
-
-v0.3d - 2016-06-11
-  - Minor clean up.
-
-v0.3c - 2016-05-28
-  - Fixed compilation error.
-
-v0.3b - 2016-05-16
-  - Fixed Linux/GCC build.
-  - Updated documentation.
-
-v0.3a - 2016-05-15
-  - Minor fixes to documentation.
-
-v0.3 - 2016-05-11
-  - Optimizations. Now at about parity with the reference implementation on 32-bit builds.
-  - Lots of clean up.
-
-v0.2b - 2016-05-10
-  - Bug fixes.
-
-v0.2a - 2016-05-10
-  - Made drflac_open_and_decode() more robust.
-  - Removed an unused debugging variable
-
-v0.2 - 2016-05-09
-  - Added support for Ogg encapsulation.
-  - API CHANGE. Have the onSeek callback take a third argument which specifies whether or not the seek
-    should be relative to the start or the current position. Also changes the seeking rules such that
-    seeking offsets will never be negative.
-  - Have drflac_open_and_decode() fail gracefully if the stream has an unknown total sample count.
-
-v0.1b - 2016-05-07
-  - Properly close the file handle in drflac_open_file() and family when the decoder fails to initialize.
-  - Removed a stale comment.
-
-v0.1a - 2016-05-05
-  - Minor formatting changes.
-  - Fixed a warning on the GCC build.
-
-v0.1 - 2016-05-03
-  - Initial versioned release.
-*/
-
-/*
-This software is available as a choice of the following licenses. Choose
-whichever you prefer.
-
-===============================================================================
-ALTERNATIVE 1 - Public Domain (www.unlicense.org)
-===============================================================================
-This is free and unencumbered software released into the public domain.
-
-Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
-software, either in source code form or as a compiled binary, for any purpose,
-commercial or non-commercial, and by any means.
-
-In jurisdictions that recognize copyright laws, the author or authors of this
-software dedicate any and all copyright interest in the software to the public
-domain. We make this dedication for the benefit of the public at large and to
-the detriment of our heirs and successors. We intend this dedication to be an
-overt act of relinquishment in perpetuity of all present and future rights to
-this software under copyright law.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-For more information, please refer to <http://unlicense.org/>
-
-===============================================================================
-ALTERNATIVE 2 - MIT No Attribution
-===============================================================================
-Copyright 2020 David Reid
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-*/
+/*
+FLAC audio decoder. Choice of public domain or MIT-0. See license statements at the end of this file.
+dr_flac - v0.12.13 - 2020-05-16
+
+David Reid - mackron@gmail.com
+
+GitHub: https://github.com/mackron/dr_libs
+*/
+
+/*
+RELEASE NOTES - v0.12.0
+=======================
+Version 0.12.0 has breaking API changes including changes to the existing API and the removal of deprecated APIs.
+
+
+Improved Client-Defined Memory Allocation
+-----------------------------------------
+The main change with this release is the addition of a more flexible way of implementing custom memory allocation routines. The
+existing system of DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE are still in place and will be used by default when no custom
+allocation callbacks are specified.
+
+To use the new system, you pass in a pointer to a drflac_allocation_callbacks object to drflac_open() and family, like this:
+
+    void* my_malloc(size_t sz, void* pUserData)
+    {
+        return malloc(sz);
+    }
+    void* my_realloc(void* p, size_t sz, void* pUserData)
+    {
+        return realloc(p, sz);
+    }
+    void my_free(void* p, void* pUserData)
+    {
+        free(p);
+    }
+
+    ...
+
+    drflac_allocation_callbacks allocationCallbacks;
+    allocationCallbacks.pUserData = &myData;
+    allocationCallbacks.onMalloc  = my_malloc;
+    allocationCallbacks.onRealloc = my_realloc;
+    allocationCallbacks.onFree    = my_free;
+    drflac* pFlac = drflac_open_file("my_file.flac", &allocationCallbacks);
+
+The advantage of this new system is that it allows you to specify user data which will be passed in to the allocation routines.
+
+Passing in null for the allocation callbacks object will cause dr_flac to use defaults which is the same as DRFLAC_MALLOC,
+DRFLAC_REALLOC and DRFLAC_FREE and the equivalent of how it worked in previous versions.
+
+Every API that opens a drflac object now takes this extra parameter. These include the following:
+
+    drflac_open()
+    drflac_open_relaxed()
+    drflac_open_with_metadata()
+    drflac_open_with_metadata_relaxed()
+    drflac_open_file()
+    drflac_open_file_with_metadata()
+    drflac_open_memory()
+    drflac_open_memory_with_metadata()
+    drflac_open_and_read_pcm_frames_s32()
+    drflac_open_and_read_pcm_frames_s16()
+    drflac_open_and_read_pcm_frames_f32()
+    drflac_open_file_and_read_pcm_frames_s32()
+    drflac_open_file_and_read_pcm_frames_s16()
+    drflac_open_file_and_read_pcm_frames_f32()
+    drflac_open_memory_and_read_pcm_frames_s32()
+    drflac_open_memory_and_read_pcm_frames_s16()
+    drflac_open_memory_and_read_pcm_frames_f32()
+
+
+
+Optimizations
+-------------
+Seeking performance has been greatly improved. A new binary search based seeking algorithm has been introduced which significantly
+improves performance over the brute force method which was used when no seek table was present. Seek table based seeking also takes
+advantage of the new binary search seeking system to further improve performance there as well. Note that this depends on CRC which
+means it will be disabled when DR_FLAC_NO_CRC is used.
+
+The SSE4.1 pipeline has been cleaned up and optimized. You should see some improvements with decoding speed of 24-bit files in
+particular. 16-bit streams should also see some improvement.
+
+drflac_read_pcm_frames_s16() has been optimized. Previously this sat on top of drflac_read_pcm_frames_s32() and performed it's s32
+to s16 conversion in a second pass. This is now all done in a single pass. This includes SSE2 and ARM NEON optimized paths.
+
+A minor optimization has been implemented for drflac_read_pcm_frames_s32(). This will now use an SSE2 optimized pipeline for stereo
+channel reconstruction which is the last part of the decoding process.
+
+The ARM build has seen a few improvements. The CLZ (count leading zeroes) and REV (byte swap) instructions are now used when
+compiling with GCC and Clang which is achieved using inline assembly. The CLZ instruction requires ARM architecture version 5 at
+compile time and the REV instruction requires ARM architecture version 6.
+
+An ARM NEON optimized pipeline has been implemented. To enable this you'll need to add -mfpu=neon to the command line when compiling.
+
+
+Removed APIs
+------------
+The following APIs were deprecated in version 0.11.0 and have been completely removed in version 0.12.0:
+
+    drflac_read_s32()                   -> drflac_read_pcm_frames_s32()
+    drflac_read_s16()                   -> drflac_read_pcm_frames_s16()
+    drflac_read_f32()                   -> drflac_read_pcm_frames_f32()
+    drflac_seek_to_sample()             -> drflac_seek_to_pcm_frame()
+    drflac_open_and_decode_s32()        -> drflac_open_and_read_pcm_frames_s32()
+    drflac_open_and_decode_s16()        -> drflac_open_and_read_pcm_frames_s16()
+    drflac_open_and_decode_f32()        -> drflac_open_and_read_pcm_frames_f32()
+    drflac_open_and_decode_file_s32()   -> drflac_open_file_and_read_pcm_frames_s32()
+    drflac_open_and_decode_file_s16()   -> drflac_open_file_and_read_pcm_frames_s16()
+    drflac_open_and_decode_file_f32()   -> drflac_open_file_and_read_pcm_frames_f32()
+    drflac_open_and_decode_memory_s32() -> drflac_open_memory_and_read_pcm_frames_s32()
+    drflac_open_and_decode_memory_s16() -> drflac_open_memory_and_read_pcm_frames_s16()
+    drflac_open_and_decode_memory_f32() -> drflac_open_memroy_and_read_pcm_frames_f32()
+
+Prior versions of dr_flac operated on a per-sample basis whereas now it operates on PCM frames. The removed APIs all relate
+to the old per-sample APIs. You now need to use the "pcm_frame" versions.
+*/
+
+
+/*
+Introduction
+============
+dr_flac is a single file library. To use it, do something like the following in one .c file.
+
+    ```c
+    #define DR_FLAC_IMPLEMENTATION
+    #include "dr_flac.h"
+    ```
+
+You can then #include this file in other parts of the program as you would with any other header file. To decode audio data, do something like the following:
+
+    ```c
+    drflac* pFlac = drflac_open_file("MySong.flac", NULL);
+    if (pFlac == NULL) {
+        // Failed to open FLAC file
+    }
+
+    drflac_int32* pSamples = malloc(pFlac->totalPCMFrameCount * pFlac->channels * sizeof(drflac_int32));
+    drflac_uint64 numberOfInterleavedSamplesActuallyRead = drflac_read_pcm_frames_s32(pFlac, pFlac->totalPCMFrameCount, pSamples);
+    ```
+
+The drflac object represents the decoder. It is a transparent type so all the information you need, such as the number of channels and the bits per sample,
+should be directly accessible - just make sure you don't change their values. Samples are always output as interleaved signed 32-bit PCM. In the example above
+a native FLAC stream was opened, however dr_flac has seamless support for Ogg encapsulated FLAC streams as well.
+
+You do not need to decode the entire stream in one go - you just specify how many samples you'd like at any given time and the decoder will give you as many
+samples as it can, up to the amount requested. Later on when you need the next batch of samples, just call it again. Example:
+
+    ```c
+    while (drflac_read_pcm_frames_s32(pFlac, chunkSizeInPCMFrames, pChunkSamples) > 0) {
+        do_something();
+    }
+    ```
+
+You can seek to a specific PCM frame with `drflac_seek_to_pcm_frame()`.
+
+If you just want to quickly decode an entire FLAC file in one go you can do something like this:
+
+    ```c
+    unsigned int channels;
+    unsigned int sampleRate;
+    drflac_uint64 totalPCMFrameCount;
+    drflac_int32* pSampleData = drflac_open_file_and_read_pcm_frames_s32("MySong.flac", &channels, &sampleRate, &totalPCMFrameCount, NULL);
+    if (pSampleData == NULL) {
+        // Failed to open and decode FLAC file.
+    }
+
+    ...
+
+    drflac_free(pSampleData);
+    ```
+
+You can read samples as signed 16-bit integer and 32-bit floating-point PCM with the *_s16() and *_f32() family of APIs respectively, but note that these
+should be considered lossy.
+
+
+If you need access to metadata (album art, etc.), use `drflac_open_with_metadata()`, `drflac_open_file_with_metdata()` or `drflac_open_memory_with_metadata()`.
+The rationale for keeping these APIs separate is that they're slightly slower than the normal versions and also just a little bit harder to use. dr_flac
+reports metadata to the application through the use of a callback, and every metadata block is reported before `drflac_open_with_metdata()` returns.
+
+The main opening APIs (`drflac_open()`, etc.) will fail if the header is not present. The presents a problem in certain scenarios such as broadcast style
+streams or internet radio where the header may not be present because the user has started playback mid-stream. To handle this, use the relaxed APIs:
+    
+    `drflac_open_relaxed()`
+    `drflac_open_with_metadata_relaxed()`
+
+It is not recommended to use these APIs for file based streams because a missing header would usually indicate a corrupt or perverse file. In addition, these
+APIs can take a long time to initialize because they may need to spend a lot of time finding the first frame.
+
+
+
+Build Options
+=============
+#define these options before including this file.
+
+#define DR_FLAC_NO_STDIO
+  Disable `drflac_open_file()` and family.
+
+#define DR_FLAC_NO_OGG
+  Disables support for Ogg/FLAC streams.
+
+#define DR_FLAC_BUFFER_SIZE <number>
+  Defines the size of the internal buffer to store data from onRead(). This buffer is used to reduce the number of calls back to the client for more data.
+  Larger values means more memory, but better performance. My tests show diminishing returns after about 4KB (which is the default). Consider reducing this if
+  you have a very efficient implementation of onRead(), or increase it if it's very inefficient. Must be a multiple of 8.
+
+#define DR_FLAC_NO_CRC
+  Disables CRC checks. This will offer a performance boost when CRC is unnecessary. This will disable binary search seeking. When seeking, the seek table will
+  be used if available. Otherwise the seek will be performed using brute force.
+
+#define DR_FLAC_NO_SIMD
+  Disables SIMD optimizations (SSE on x86/x64 architectures, NEON on ARM architectures). Use this if you are having compatibility issues with your compiler.
+
+
+
+Notes
+=====
+- dr_flac does not support changing the sample rate nor channel count mid stream.
+- dr_flac is not thread-safe, but its APIs can be called from any thread so long as you do your own synchronization.
+- When using Ogg encapsulation, a corrupted metadata block will result in `drflac_open_with_metadata()` and `drflac_open()` returning inconsistent samples due
+  to differences in corrupted stream recorvery logic between the two APIs.
+*/
+
+#ifndef dr_flac_h
+#define dr_flac_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define DRFLAC_STRINGIFY(x)      #x
+#define DRFLAC_XSTRINGIFY(x)     DRFLAC_STRINGIFY(x)
+
+#define DRFLAC_VERSION_MAJOR     0
+#define DRFLAC_VERSION_MINOR     12
+#define DRFLAC_VERSION_REVISION  13
+#define DRFLAC_VERSION_STRING    DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MAJOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_MINOR) "." DRFLAC_XSTRINGIFY(DRFLAC_VERSION_REVISION)
+
+#include <stddef.h> /* For size_t. */
+
+/* Sized types. Prefer built-in types. Fall back to stdint. */
+#ifdef _MSC_VER
+    #if defined(__clang__)
+        #pragma GCC diagnostic push
+        #pragma GCC diagnostic ignored "-Wlanguage-extension-token"
+        #pragma GCC diagnostic ignored "-Wlong-long"        
+        #pragma GCC diagnostic ignored "-Wc++11-long-long"
+    #endif
+    typedef   signed __int8  drflac_int8;
+    typedef unsigned __int8  drflac_uint8;
+    typedef   signed __int16 drflac_int16;
+    typedef unsigned __int16 drflac_uint16;
+    typedef   signed __int32 drflac_int32;
+    typedef unsigned __int32 drflac_uint32;
+    typedef   signed __int64 drflac_int64;
+    typedef unsigned __int64 drflac_uint64;
+    #if defined(__clang__)
+        #pragma GCC diagnostic pop
+    #endif
+#else
+    #include <stdint.h>
+    typedef int8_t           drflac_int8;
+    typedef uint8_t          drflac_uint8;
+    typedef int16_t          drflac_int16;
+    typedef uint16_t         drflac_uint16;
+    typedef int32_t          drflac_int32;
+    typedef uint32_t         drflac_uint32;
+    typedef int64_t          drflac_int64;
+    typedef uint64_t         drflac_uint64;
+#endif
+typedef drflac_uint8         drflac_bool8;
+typedef drflac_uint32        drflac_bool32;
+#define DRFLAC_TRUE          1
+#define DRFLAC_FALSE         0
+
+#if !defined(DRFLAC_API)
+    #if defined(DRFLAC_DLL)
+        #if defined(_WIN32)
+            #define DRFLAC_DLL_IMPORT  __declspec(dllimport)
+            #define DRFLAC_DLL_EXPORT  __declspec(dllexport)
+            #define DRFLAC_DLL_PRIVATE static
+        #else
+            #if defined(__GNUC__) && __GNUC__ >= 4
+                #define DRFLAC_DLL_IMPORT  __attribute__((visibility("default")))
+                #define DRFLAC_DLL_EXPORT  __attribute__((visibility("default")))
+                #define DRFLAC_DLL_PRIVATE __attribute__((visibility("hidden")))
+            #else
+                #define DRFLAC_DLL_IMPORT
+                #define DRFLAC_DLL_EXPORT
+                #define DRFLAC_DLL_PRIVATE static
+            #endif
+        #endif
+
+        #if defined(DR_FLAC_IMPLEMENTATION) || defined(DRFLAC_IMPLEMENTATION)
+            #define DRFLAC_API  DRFLAC_DLL_EXPORT
+        #else
+            #define DRFLAC_API  DRFLAC_DLL_IMPORT
+        #endif
+        #define DRFLAC_PRIVATE DRFLAC_DLL_PRIVATE
+    #else
+        #define DRFLAC_API extern
+        #define DRFLAC_PRIVATE static
+    #endif
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1700   /* Visual Studio 2012 */
+    #define DRFLAC_DEPRECATED       __declspec(deprecated)
+#elif (defined(__GNUC__) && __GNUC__ >= 4)  /* GCC 4 */
+    #define DRFLAC_DEPRECATED       __attribute__((deprecated))
+#elif defined(__has_feature)                /* Clang */
+    #if __has_feature(attribute_deprecated)
+        #define DRFLAC_DEPRECATED   __attribute__((deprecated))
+    #else
+        #define DRFLAC_DEPRECATED
+    #endif
+#else
+    #define DRFLAC_DEPRECATED
+#endif
+
+DRFLAC_API void drflac_version(drflac_uint32* pMajor, drflac_uint32* pMinor, drflac_uint32* pRevision);
+DRFLAC_API const char* drflac_version_string();
+
+/*
+As data is read from the client it is placed into an internal buffer for fast access. This controls the size of that buffer. Larger values means more speed,
+but also more memory. In my testing there is diminishing returns after about 4KB, but you can fiddle with this to suit your own needs. Must be a multiple of 8.
+*/
+#ifndef DR_FLAC_BUFFER_SIZE
+#define DR_FLAC_BUFFER_SIZE   4096
+#endif
+
+/* Check if we can enable 64-bit optimizations. */
+#if defined(_WIN64) || defined(_LP64) || defined(__LP64__)
+#define DRFLAC_64BIT
+#endif
+
+#ifdef DRFLAC_64BIT
+typedef drflac_uint64 drflac_cache_t;
+#else
+typedef drflac_uint32 drflac_cache_t;
+#endif
+
+/* The various metadata block types. */
+#define DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO       0
+#define DRFLAC_METADATA_BLOCK_TYPE_PADDING          1
+#define DRFLAC_METADATA_BLOCK_TYPE_APPLICATION      2
+#define DRFLAC_METADATA_BLOCK_TYPE_SEEKTABLE        3
+#define DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT   4
+#define DRFLAC_METADATA_BLOCK_TYPE_CUESHEET         5
+#define DRFLAC_METADATA_BLOCK_TYPE_PICTURE          6
+#define DRFLAC_METADATA_BLOCK_TYPE_INVALID          127
+
+/* The various picture types specified in the PICTURE block. */
+#define DRFLAC_PICTURE_TYPE_OTHER                   0
+#define DRFLAC_PICTURE_TYPE_FILE_ICON               1
+#define DRFLAC_PICTURE_TYPE_OTHER_FILE_ICON         2
+#define DRFLAC_PICTURE_TYPE_COVER_FRONT             3
+#define DRFLAC_PICTURE_TYPE_COVER_BACK              4
+#define DRFLAC_PICTURE_TYPE_LEAFLET_PAGE            5
+#define DRFLAC_PICTURE_TYPE_MEDIA                   6
+#define DRFLAC_PICTURE_TYPE_LEAD_ARTIST             7
+#define DRFLAC_PICTURE_TYPE_ARTIST                  8
+#define DRFLAC_PICTURE_TYPE_CONDUCTOR               9
+#define DRFLAC_PICTURE_TYPE_BAND                    10
+#define DRFLAC_PICTURE_TYPE_COMPOSER                11
+#define DRFLAC_PICTURE_TYPE_LYRICIST                12
+#define DRFLAC_PICTURE_TYPE_RECORDING_LOCATION      13
+#define DRFLAC_PICTURE_TYPE_DURING_RECORDING        14
+#define DRFLAC_PICTURE_TYPE_DURING_PERFORMANCE      15
+#define DRFLAC_PICTURE_TYPE_SCREEN_CAPTURE          16
+#define DRFLAC_PICTURE_TYPE_BRIGHT_COLORED_FISH     17
+#define DRFLAC_PICTURE_TYPE_ILLUSTRATION            18
+#define DRFLAC_PICTURE_TYPE_BAND_LOGOTYPE           19
+#define DRFLAC_PICTURE_TYPE_PUBLISHER_LOGOTYPE      20
+
+typedef enum
+{
+    drflac_container_native,
+    drflac_container_ogg,
+    drflac_container_unknown
+} drflac_container;
+
+typedef enum
+{
+    drflac_seek_origin_start,
+    drflac_seek_origin_current
+} drflac_seek_origin;
+
+/* Packing is important on this structure because we map this directly to the raw data within the SEEKTABLE metadata block. */
+#pragma pack(2)
+typedef struct
+{
+    drflac_uint64 firstPCMFrame;
+    drflac_uint64 flacFrameOffset;   /* The offset from the first byte of the header of the first frame. */
+    drflac_uint16 pcmFrameCount;
+} drflac_seekpoint;
+#pragma pack()
+
+typedef struct
+{
+    drflac_uint16 minBlockSizeInPCMFrames;
+    drflac_uint16 maxBlockSizeInPCMFrames;
+    drflac_uint32 minFrameSizeInPCMFrames;
+    drflac_uint32 maxFrameSizeInPCMFrames;
+    drflac_uint32 sampleRate;
+    drflac_uint8  channels;
+    drflac_uint8  bitsPerSample;
+    drflac_uint64 totalPCMFrameCount;
+    drflac_uint8  md5[16];
+} drflac_streaminfo;
+
+typedef struct
+{
+    /* The metadata type. Use this to know how to interpret the data below. */
+    drflac_uint32 type;
+
+    /*
+    A pointer to the raw data. This points to a temporary buffer so don't hold on to it. It's best to
+    not modify the contents of this buffer. Use the structures below for more meaningful and structured
+    information about the metadata. It's possible for this to be null.
+    */
+    const void* pRawData;
+
+    /* The size in bytes of the block and the buffer pointed to by pRawData if it's non-NULL. */
+    drflac_uint32 rawDataSize;
+
+    union
+    {
+        drflac_streaminfo streaminfo;
+
+        struct
+        {
+            int unused;
+        } padding;
+
+        struct
+        {
+            drflac_uint32 id;
+            const void* pData;
+            drflac_uint32 dataSize;
+        } application;
+
+        struct
+        {
+            drflac_uint32 seekpointCount;
+            const drflac_seekpoint* pSeekpoints;
+        } seektable;
+
+        struct
+        {
+            drflac_uint32 vendorLength;
+            const char* vendor;
+            drflac_uint32 commentCount;
+            const void* pComments;
+        } vorbis_comment;
+
+        struct
+        {
+            char catalog[128];
+            drflac_uint64 leadInSampleCount;
+            drflac_bool32 isCD;
+            drflac_uint8 trackCount;
+            const void* pTrackData;
+        } cuesheet;
+
+        struct
+        {
+            drflac_uint32 type;
+            drflac_uint32 mimeLength;
+            const char* mime;
+            drflac_uint32 descriptionLength;
+            const char* description;
+            drflac_uint32 width;
+            drflac_uint32 height;
+            drflac_uint32 colorDepth;
+            drflac_uint32 indexColorCount;
+            drflac_uint32 pictureDataSize;
+            const drflac_uint8* pPictureData;
+        } picture;
+    } data;
+} drflac_metadata;
+
+
+/*
+Callback for when data needs to be read from the client.
+
+
+Parameters
+----------
+pUserData (in)
+    The user data that was passed to drflac_open() and family.
+
+pBufferOut (out)
+    The output buffer.
+
+bytesToRead (in)
+    The number of bytes to read.
+
+
+Return Value
+------------
+The number of bytes actually read.
+
+
+Remarks
+-------
+A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until either the entire bytesToRead is filled or
+you have reached the end of the stream.
+*/
+typedef size_t (* drflac_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead);
+
+/*
+Callback for when data needs to be seeked.
+
+
+Parameters
+----------
+pUserData (in)
+    The user data that was passed to drflac_open() and family.
+
+offset (in)
+    The number of bytes to move, relative to the origin. Will never be negative.
+
+origin (in)
+    The origin of the seek - the current position or the start of the stream.
+
+
+Return Value
+------------
+Whether or not the seek was successful.
+
+
+Remarks
+-------
+The offset will never be negative. Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which will be
+either drflac_seek_origin_start or drflac_seek_origin_current.
+
+When seeking to a PCM frame using drflac_seek_to_pcm_frame(), dr_flac may call this with an offset beyond the end of the FLAC stream. This needs to be detected
+and handled by returning DRFLAC_FALSE.
+*/
+typedef drflac_bool32 (* drflac_seek_proc)(void* pUserData, int offset, drflac_seek_origin origin);
+
+/*
+Callback for when a metadata block is read.
+
+
+Parameters
+----------
+pUserData (in)
+    The user data that was passed to drflac_open() and family.
+
+pMetadata (in)
+    A pointer to a structure containing the data of the metadata block.
+
+
+Remarks
+-------
+Use pMetadata->type to determine which metadata block is being handled and how to read the data.
+*/
+typedef void (* drflac_meta_proc)(void* pUserData, drflac_metadata* pMetadata);
+
+
+typedef struct
+{
+    void* pUserData;
+    void* (* onMalloc)(size_t sz, void* pUserData);
+    void* (* onRealloc)(void* p, size_t sz, void* pUserData);
+    void  (* onFree)(void* p, void* pUserData);
+} drflac_allocation_callbacks;
+
+/* Structure for internal use. Only used for decoders opened with drflac_open_memory. */
+typedef struct
+{
+    const drflac_uint8* data;
+    size_t dataSize;
+    size_t currentReadPos;
+} drflac__memory_stream;
+
+/* Structure for internal use. Used for bit streaming. */
+typedef struct
+{
+    /* The function to call when more data needs to be read. */
+    drflac_read_proc onRead;
+
+    /* The function to call when the current read position needs to be moved. */
+    drflac_seek_proc onSeek;
+
+    /* The user data to pass around to onRead and onSeek. */
+    void* pUserData;
+
+
+    /*
+    The number of unaligned bytes in the L2 cache. This will always be 0 until the end of the stream is hit. At the end of the
+    stream there will be a number of bytes that don't cleanly fit in an L1 cache line, so we use this variable to know whether
+    or not the bistreamer needs to run on a slower path to read those last bytes. This will never be more than sizeof(drflac_cache_t).
+    */
+    size_t unalignedByteCount;
+
+    /* The content of the unaligned bytes. */
+    drflac_cache_t unalignedCache;
+
+    /* The index of the next valid cache line in the "L2" cache. */
+    drflac_uint32 nextL2Line;
+
+    /* The number of bits that have been consumed by the cache. This is used to determine how many valid bits are remaining. */
+    drflac_uint32 consumedBits;
+
+    /*
+    The cached data which was most recently read from the client. There are two levels of cache. Data flows as such:
+    Client -> L2 -> L1. The L2 -> L1 movement is aligned and runs on a fast path in just a few instructions.
+    */
+    drflac_cache_t cacheL2[DR_FLAC_BUFFER_SIZE/sizeof(drflac_cache_t)];
+    drflac_cache_t cache;
+
+    /*
+    CRC-16. This is updated whenever bits are read from the bit stream. Manually set this to 0 to reset the CRC. For FLAC, this
+    is reset to 0 at the beginning of each frame.
+    */
+    drflac_uint16 crc16;
+    drflac_cache_t crc16Cache;              /* A cache for optimizing CRC calculations. This is filled when when the L1 cache is reloaded. */
+    drflac_uint32 crc16CacheIgnoredBytes;   /* The number of bytes to ignore when updating the CRC-16 from the CRC-16 cache. */
+} drflac_bs;
+
+typedef struct
+{
+    /* The type of the subframe: SUBFRAME_CONSTANT, SUBFRAME_VERBATIM, SUBFRAME_FIXED or SUBFRAME_LPC. */
+    drflac_uint8 subframeType;
+
+    /* The number of wasted bits per sample as specified by the sub-frame header. */
+    drflac_uint8 wastedBitsPerSample;
+
+    /* The order to use for the prediction stage for SUBFRAME_FIXED and SUBFRAME_LPC. */
+    drflac_uint8 lpcOrder;
+
+    /* A pointer to the buffer containing the decoded samples in the subframe. This pointer is an offset from drflac::pExtraData. */
+    drflac_int32* pSamplesS32;
+} drflac_subframe;
+
+typedef struct
+{
+    /*
+    If the stream uses variable block sizes, this will be set to the index of the first PCM frame. If fixed block sizes are used, this will
+    always be set to 0. This is 64-bit because the decoded PCM frame number will be 36 bits.
+    */
+    drflac_uint64 pcmFrameNumber;
+
+    /*
+    If the stream uses fixed block sizes, this will be set to the frame number. If variable block sizes are used, this will always be 0. This
+    is 32-bit because in fixed block sizes, the maximum frame number will be 31 bits.
+    */
+    drflac_uint32 flacFrameNumber;
+
+    /* The sample rate of this frame. */
+    drflac_uint32 sampleRate;
+
+    /* The number of PCM frames in each sub-frame within this frame. */
+    drflac_uint16 blockSizeInPCMFrames;
+
+    /*
+    The channel assignment of this frame. This is not always set to the channel count. If interchannel decorrelation is being used this
+    will be set to DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE, DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE or DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE.
+    */
+    drflac_uint8 channelAssignment;
+
+    /* The number of bits per sample within this frame. */
+    drflac_uint8 bitsPerSample;
+
+    /* The frame's CRC. */
+    drflac_uint8 crc8;
+} drflac_frame_header;
+
+typedef struct
+{
+    /* The header. */
+    drflac_frame_header header;
+
+    /*
+    The number of PCM frames left to be read in this FLAC frame. This is initially set to the block size. As PCM frames are read,
+    this will be decremented. When it reaches 0, the decoder will see this frame as fully consumed and load the next frame.
+    */
+    drflac_uint32 pcmFramesRemaining;
+
+    /* The list of sub-frames within the frame. There is one sub-frame for each channel, and there's a maximum of 8 channels. */
+    drflac_subframe subframes[8];
+} drflac_frame;
+
+typedef struct
+{
+    /* The function to call when a metadata block is read. */
+    drflac_meta_proc onMeta;
+
+    /* The user data posted to the metadata callback function. */
+    void* pUserDataMD;
+
+    /* Memory allocation callbacks. */
+    drflac_allocation_callbacks allocationCallbacks;
+
+
+    /* The sample rate. Will be set to something like 44100. */
+    drflac_uint32 sampleRate;
+
+    /*
+    The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. Maximum 8. This is set based on the
+    value specified in the STREAMINFO block.
+    */
+    drflac_uint8 channels;
+
+    /* The bits per sample. Will be set to something like 16, 24, etc. */
+    drflac_uint8 bitsPerSample;
+
+    /* The maximum block size, in samples. This number represents the number of samples in each channel (not combined). */
+    drflac_uint16 maxBlockSizeInPCMFrames;
+
+    /*
+    The total number of PCM Frames making up the stream. Can be 0 in which case it's still a valid stream, but just means
+    the total PCM frame count is unknown. Likely the case with streams like internet radio.
+    */
+    drflac_uint64 totalPCMFrameCount;
+
+
+    /* The container type. This is set based on whether or not the decoder was opened from a native or Ogg stream. */
+    drflac_container container;
+
+    /* The number of seekpoints in the seektable. */
+    drflac_uint32 seekpointCount;
+
+
+    /* Information about the frame the decoder is currently sitting on. */
+    drflac_frame currentFLACFrame;
+
+
+    /* The index of the PCM frame the decoder is currently sitting on. This is only used for seeking. */
+    drflac_uint64 currentPCMFrame;
+
+    /* The position of the first FLAC frame in the stream. This is only ever used for seeking. */
+    drflac_uint64 firstFLACFramePosInBytes;
+
+
+    /* A hack to avoid a malloc() when opening a decoder with drflac_open_memory(). */
+    drflac__memory_stream memoryStream;
+
+
+    /* A pointer to the decoded sample data. This is an offset of pExtraData. */
+    drflac_int32* pDecodedSamples;
+
+    /* A pointer to the seek table. This is an offset of pExtraData, or NULL if there is no seek table. */
+    drflac_seekpoint* pSeekpoints;
+
+    /* Internal use only. Only used with Ogg containers. Points to a drflac_oggbs object. This is an offset of pExtraData. */
+    void* _oggbs;
+
+    /* Internal use only. Used for profiling and testing different seeking modes. */
+    drflac_bool32 _noSeekTableSeek    : 1;
+    drflac_bool32 _noBinarySearchSeek : 1;
+    drflac_bool32 _noBruteForceSeek   : 1;
+
+    /* The bit streamer. The raw FLAC data is fed through this object. */
+    drflac_bs bs;
+
+    /* Variable length extra data. We attach this to the end of the object so we can avoid unnecessary mallocs. */
+    drflac_uint8 pExtraData[1];
+} drflac;
+
+
+/*
+Opens a FLAC decoder.
+
+
+Parameters
+----------
+onRead (in)
+    The function to call when data needs to be read from the client.
+
+onSeek (in)
+    The function to call when the read position of the client data needs to move.
+
+pUserData (in, optional)
+    A pointer to application defined data that will be passed to onRead and onSeek.
+
+pAllocationCallbacks (in, optional)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Return Value
+------------
+Returns a pointer to an object representing the decoder.
+
+
+Remarks
+-------
+Close the decoder with `drflac_close()`.
+
+`pAllocationCallbacks` can be NULL in which case it will use `DRFLAC_MALLOC`, `DRFLAC_REALLOC` and `DRFLAC_FREE`.
+
+This function will automatically detect whether or not you are attempting to open a native or Ogg encapsulated FLAC, both of which should work seamlessly
+without any manual intervention. Ogg encapsulation also works with multiplexed streams which basically means it can play FLAC encoded audio tracks in videos.
+
+This is the lowest level function for opening a FLAC stream. You can also use `drflac_open_file()` and `drflac_open_memory()` to open the stream from a file or
+from a block of memory respectively.
+
+The STREAMINFO block must be present for this to succeed. Use `drflac_open_relaxed()` to open a FLAC stream where the header may not be present.
+
+
+Seek Also
+---------
+drflac_open_file()
+drflac_open_memory()
+drflac_open_with_metadata()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Opens a FLAC stream with relaxed validation of the header block.
+
+
+Parameters
+----------
+onRead (in)
+    The function to call when data needs to be read from the client.
+
+onSeek (in)
+    The function to call when the read position of the client data needs to move.
+
+container (in)
+    Whether or not the FLAC stream is encapsulated using standard FLAC encapsulation or Ogg encapsulation.
+
+pUserData (in, optional)
+    A pointer to application defined data that will be passed to onRead and onSeek.
+
+pAllocationCallbacks (in, optional)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Return Value
+------------
+A pointer to an object representing the decoder.
+
+
+Remarks
+-------
+The same as drflac_open(), except attempts to open the stream even when a header block is not present.
+
+Because the header is not necessarily available, the caller must explicitly define the container (Native or Ogg). Do not set this to `drflac_container_unknown`
+as that is for internal use only.
+
+Opening in relaxed mode will continue reading data from onRead until it finds a valid frame. If a frame is never found it will continue forever. To abort,
+force your `onRead` callback to return 0, which dr_flac will use as an indicator that the end of the stream was found.
+*/
+DRFLAC_API drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Opens a FLAC decoder and notifies the caller of the metadata chunks (album art, etc.).
+
+
+Parameters
+----------
+onRead (in)
+    The function to call when data needs to be read from the client.
+
+onSeek (in)
+    The function to call when the read position of the client data needs to move.
+
+onMeta (in)
+    The function to call for every metadata block.
+
+pUserData (in, optional)
+    A pointer to application defined data that will be passed to onRead, onSeek and onMeta.
+
+pAllocationCallbacks (in, optional)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Return Value
+------------
+A pointer to an object representing the decoder.
+
+
+Remarks
+-------
+Close the decoder with `drflac_close()`.
+
+`pAllocationCallbacks` can be NULL in which case it will use `DRFLAC_MALLOC`, `DRFLAC_REALLOC` and `DRFLAC_FREE`.
+
+This is slower than `drflac_open()`, so avoid this one if you don't need metadata. Internally, this will allocate and free memory on the heap for every
+metadata block except for STREAMINFO and PADDING blocks.
+
+The caller is notified of the metadata via the `onMeta` callback. All metadata blocks will be handled before the function returns.
+
+The STREAMINFO block must be present for this to succeed. Use `drflac_open_with_metadata_relaxed()` to open a FLAC stream where the header may not be present.
+
+Note that this will behave inconsistently with `drflac_open()` if the stream is an Ogg encapsulated stream and a metadata block is corrupted. This is due to
+the way the Ogg stream recovers from corrupted pages. When `drflac_open_with_metadata()` is being used, the open routine will try to read the contents of the
+metadata block, whereas `drflac_open()` will simply seek past it (for the sake of efficiency). This inconsistency can result in different samples being
+returned depending on whether or not the stream is being opened with metadata.
+
+
+Seek Also
+---------
+drflac_open_file_with_metadata()
+drflac_open_memory_with_metadata()
+drflac_open()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+The same as drflac_open_with_metadata(), except attempts to open the stream even when a header block is not present.
+
+See Also
+--------
+drflac_open_with_metadata()
+drflac_open_relaxed()
+*/
+DRFLAC_API drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Closes the given FLAC decoder.
+
+
+Parameters
+----------
+pFlac (in)
+    The decoder to close.
+
+
+Remarks
+-------
+This will destroy the decoder object.
+
+
+See Also
+--------
+drflac_open()
+drflac_open_with_metadata()
+drflac_open_file()
+drflac_open_file_w()
+drflac_open_file_with_metadata()
+drflac_open_file_with_metadata_w()
+drflac_open_memory()
+drflac_open_memory_with_metadata()
+*/
+DRFLAC_API void drflac_close(drflac* pFlac);
+
+
+/*
+Reads sample data from the given FLAC decoder, output as interleaved signed 32-bit PCM.
+
+
+Parameters
+----------
+pFlac (in)
+    The decoder.
+
+framesToRead (in)
+    The number of PCM frames to read.
+
+pBufferOut (out, optional)
+    A pointer to the buffer that will receive the decoded samples.
+
+
+Return Value
+------------
+Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end.
+
+
+Remarks
+-------
+pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked.
+*/
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut);
+
+
+/*
+Reads sample data from the given FLAC decoder, output as interleaved signed 16-bit PCM.
+
+
+Parameters
+----------
+pFlac (in)
+    The decoder.
+
+framesToRead (in)
+    The number of PCM frames to read.
+
+pBufferOut (out, optional)
+    A pointer to the buffer that will receive the decoded samples.
+
+
+Return Value
+------------
+Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end.
+
+
+Remarks
+-------
+pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked.
+
+Note that this is lossy for streams where the bits per sample is larger than 16.
+*/
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut);
+
+/*
+Reads sample data from the given FLAC decoder, output as interleaved 32-bit floating point PCM.
+
+
+Parameters
+----------
+pFlac (in)
+    The decoder.
+
+framesToRead (in)
+    The number of PCM frames to read.
+
+pBufferOut (out, optional)
+    A pointer to the buffer that will receive the decoded samples.
+
+
+Return Value
+------------
+Returns the number of PCM frames actually read. If the return value is less than `framesToRead` it has reached the end.
+
+
+Remarks
+-------
+pBufferOut can be null, in which case the call will act as a seek, and the return value will be the number of frames seeked.
+
+Note that this should be considered lossy due to the nature of floating point numbers not being able to exactly represent every possible number.
+*/
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, float* pBufferOut);
+
+/*
+Seeks to the PCM frame at the given index.
+
+
+Parameters
+----------
+pFlac (in)
+    The decoder.
+
+pcmFrameIndex (in)
+    The index of the PCM frame to seek to. See notes below.
+
+
+Return Value
+-------------
+`DRFLAC_TRUE` if successful; `DRFLAC_FALSE` otherwise.
+*/
+DRFLAC_API drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex);
+
+
+
+#ifndef DR_FLAC_NO_STDIO
+/*
+Opens a FLAC decoder from the file at the given path.
+
+
+Parameters
+----------
+pFileName (in)
+    The path of the file to open, either absolute or relative to the current directory.
+
+pAllocationCallbacks (in, optional)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Return Value
+------------
+A pointer to an object representing the decoder.
+
+
+Remarks
+-------
+Close the decoder with drflac_close().
+
+
+Remarks
+-------
+This will hold a handle to the file until the decoder is closed with drflac_close(). Some platforms will restrict the number of files a process can have open
+at any given time, so keep this mind if you have many decoders open at the same time.
+
+
+See Also
+--------
+drflac_open_file_with_metadata()
+drflac_open()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open_file(const char* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks);
+DRFLAC_API drflac* drflac_open_file_w(const wchar_t* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Opens a FLAC decoder from the file at the given path and notifies the caller of the metadata chunks (album art, etc.)
+
+
+Parameters
+----------
+pFileName (in)
+    The path of the file to open, either absolute or relative to the current directory.
+
+pAllocationCallbacks (in, optional)
+    A pointer to application defined callbacks for managing memory allocations.
+
+onMeta (in)
+    The callback to fire for each metadata block.
+
+pUserData (in)
+    A pointer to the user data to pass to the metadata callback.
+
+pAllocationCallbacks (in)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Remarks
+-------
+Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled.
+
+
+See Also
+--------
+drflac_open_with_metadata()
+drflac_open()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open_file_with_metadata(const char* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+DRFLAC_API drflac* drflac_open_file_with_metadata_w(const wchar_t* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+#endif
+
+/*
+Opens a FLAC decoder from a pre-allocated block of memory
+
+
+Parameters
+----------
+pData (in)
+    A pointer to the raw encoded FLAC data.
+
+dataSize (in)
+    The size in bytes of `data`.
+
+pAllocationCallbacks (in)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Return Value
+------------
+A pointer to an object representing the decoder.
+
+
+Remarks
+-------
+This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for the lifetime of the decoder.
+
+
+See Also
+--------
+drflac_open()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open_memory(const void* pData, size_t dataSize, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Opens a FLAC decoder from a pre-allocated block of memory and notifies the caller of the metadata chunks (album art, etc.)
+
+
+Parameters
+----------
+pData (in)
+    A pointer to the raw encoded FLAC data.
+
+dataSize (in)
+    The size in bytes of `data`.
+
+onMeta (in)
+    The callback to fire for each metadata block.
+
+pUserData (in)
+    A pointer to the user data to pass to the metadata callback.
+
+pAllocationCallbacks (in)
+    A pointer to application defined callbacks for managing memory allocations.
+
+
+Remarks
+-------
+Look at the documentation for drflac_open_with_metadata() for more information on how metadata is handled.
+
+
+See Also
+-------
+drflac_open_with_metadata()
+drflac_open()
+drflac_close()
+*/
+DRFLAC_API drflac* drflac_open_memory_with_metadata(const void* pData, size_t dataSize, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+
+
+/* High Level APIs */
+
+/*
+Opens a FLAC stream from the given callbacks and fully decodes it in a single operation. The return value is a
+pointer to the sample data as interleaved signed 32-bit PCM. The returned data must be freed with drflac_free().
+
+You can pass in custom memory allocation callbacks via the pAllocationCallbacks parameter. This can be NULL in which
+case it will use DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE.
+
+Sometimes a FLAC file won't keep track of the total sample count. In this situation the function will continuously
+read samples into a dynamically sized buffer on the heap until no samples are left.
+
+Do not call this function on a broadcast type of stream (like internet radio streams and whatnot).
+*/
+DRFLAC_API drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */
+DRFLAC_API drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */
+DRFLAC_API float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+#ifndef DR_FLAC_NO_STDIO
+/* Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a file. */
+DRFLAC_API drflac_int32* drflac_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_file_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */
+DRFLAC_API drflac_int16* drflac_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_file_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */
+DRFLAC_API float* drflac_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+#endif
+
+/* Same as drflac_open_and_read_pcm_frames_s32() except opens the decoder from a block of memory. */
+DRFLAC_API drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns signed 16-bit integer samples. */
+DRFLAC_API drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/* Same as drflac_open_memory_and_read_pcm_frames_s32(), except returns 32-bit floating-point samples. */
+DRFLAC_API float* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Frees memory that was allocated internally by dr_flac.
+
+Set pAllocationCallbacks to the same object that was passed to drflac_open_*_and_read_pcm_frames_*(). If you originally passed in NULL, pass in NULL for this.
+*/
+DRFLAC_API void drflac_free(void* p, const drflac_allocation_callbacks* pAllocationCallbacks);
+
+
+/* Structure representing an iterator for vorbis comments in a VORBIS_COMMENT metadata block. */
+typedef struct
+{
+    drflac_uint32 countRemaining;
+    const char* pRunningData;
+} drflac_vorbis_comment_iterator;
+
+/*
+Initializes a vorbis comment iterator. This can be used for iterating over the vorbis comments in a VORBIS_COMMENT
+metadata block.
+*/
+DRFLAC_API void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const void* pComments);
+
+/*
+Goes to the next vorbis comment in the given iterator. If null is returned it means there are no more comments. The
+returned string is NOT null terminated.
+*/
+DRFLAC_API const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut);
+
+
+/* Structure representing an iterator for cuesheet tracks in a CUESHEET metadata block. */
+typedef struct
+{
+    drflac_uint32 countRemaining;
+    const char* pRunningData;
+} drflac_cuesheet_track_iterator;
+
+/* Packing is important on this structure because we map this directly to the raw data within the CUESHEET metadata block. */
+#pragma pack(4)
+typedef struct
+{
+    drflac_uint64 offset;
+    drflac_uint8 index;
+    drflac_uint8 reserved[3];
+} drflac_cuesheet_track_index;
+#pragma pack()
+
+typedef struct
+{
+    drflac_uint64 offset;
+    drflac_uint8 trackNumber;
+    char ISRC[12];
+    drflac_bool8 isAudio;
+    drflac_bool8 preEmphasis;
+    drflac_uint8 indexCount;
+    const drflac_cuesheet_track_index* pIndexPoints;
+} drflac_cuesheet_track;
+
+/*
+Initializes a cuesheet track iterator. This can be used for iterating over the cuesheet tracks in a CUESHEET metadata
+block.
+*/
+DRFLAC_API void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_uint32 trackCount, const void* pTrackData);
+
+/* Goes to the next cuesheet track in the given iterator. If DRFLAC_FALSE is returned it means there are no more comments. */
+DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack);
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif  /* dr_flac_h */
+
+
+/************************************************************************************************************************************************************
+ ************************************************************************************************************************************************************
+
+ IMPLEMENTATION
+
+ ************************************************************************************************************************************************************
+ ************************************************************************************************************************************************************/
+#if defined(DR_FLAC_IMPLEMENTATION) || defined(DRFLAC_IMPLEMENTATION)
+
+/* Disable some annoying warnings. */
+#if defined(__GNUC__)
+    #pragma GCC diagnostic push
+    #if __GNUC__ >= 7
+    #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
+    #endif
+#endif
+
+#ifdef __linux__
+    #ifndef _BSD_SOURCE
+        #define _BSD_SOURCE
+    #endif
+    #ifndef __USE_BSD
+        #define __USE_BSD
+    #endif
+    #include <endian.h>
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef _MSC_VER
+    #define DRFLAC_INLINE __forceinline
+#elif defined(__GNUC__)
+    /*
+    I've had a bug report where GCC is emitting warnings about functions possibly not being inlineable. This warning happens when
+    the __attribute__((always_inline)) attribute is defined without an "inline" statement. I think therefore there must be some
+    case where "__inline__" is not always defined, thus the compiler emitting these warnings. When using -std=c89 or -ansi on the
+    command line, we cannot use the "inline" keyword and instead need to use "__inline__". In an attempt to work around this issue
+    I am using "__inline__" only when we're compiling in strict ANSI mode.
+    */
+    #if defined(__STRICT_ANSI__)
+        #define DRFLAC_INLINE __inline__ __attribute__((always_inline))
+    #else
+        #define DRFLAC_INLINE inline __attribute__((always_inline))
+    #endif
+#else
+    #define DRFLAC_INLINE
+#endif
+
+/* CPU architecture. */
+#if defined(__x86_64__) || defined(_M_X64)
+    #define DRFLAC_X64
+#elif defined(__i386) || defined(_M_IX86)
+    #define DRFLAC_X86
+#elif defined(__arm__) || defined(_M_ARM)
+    #define DRFLAC_ARM
+#endif
+
+/* Intrinsics Support */
+#if !defined(DR_FLAC_NO_SIMD)
+    #if defined(DRFLAC_X64) || defined(DRFLAC_X86)
+        #if defined(_MSC_VER) && !defined(__clang__)
+            /* MSVC. */
+            #if _MSC_VER >= 1400 && !defined(DRFLAC_NO_SSE2)    /* 2005 */
+                #define DRFLAC_SUPPORT_SSE2
+            #endif
+            #if _MSC_VER >= 1600 && !defined(DRFLAC_NO_SSE41)   /* 2010 */
+                #define DRFLAC_SUPPORT_SSE41
+            #endif
+        #else
+            /* Assume GNUC-style. */
+            #if defined(__SSE2__) && !defined(DRFLAC_NO_SSE2)
+                #define DRFLAC_SUPPORT_SSE2
+            #endif
+            #if defined(__SSE4_1__) && !defined(DRFLAC_NO_SSE41)
+                #define DRFLAC_SUPPORT_SSE41
+            #endif
+        #endif
+
+        /* If at this point we still haven't determined compiler support for the intrinsics just fall back to __has_include. */
+        #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include)
+            #if !defined(DRFLAC_SUPPORT_SSE2) && !defined(DRFLAC_NO_SSE2) && __has_include(<emmintrin.h>)
+                #define DRFLAC_SUPPORT_SSE2
+            #endif
+            #if !defined(DRFLAC_SUPPORT_SSE41) && !defined(DRFLAC_NO_SSE41) && __has_include(<smmintrin.h>)
+                #define DRFLAC_SUPPORT_SSE41
+            #endif
+        #endif
+
+        #if defined(DRFLAC_SUPPORT_SSE41)
+            #include <smmintrin.h>
+        #elif defined(DRFLAC_SUPPORT_SSE2)
+            #include <emmintrin.h>
+        #endif
+    #endif
+
+    #if defined(DRFLAC_ARM)
+        #if !defined(DRFLAC_NO_NEON) && (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64))
+            #define DRFLAC_SUPPORT_NEON
+        #endif
+
+        /* Fall back to looking for the #include file. */
+        #if !defined(__GNUC__) && !defined(__clang__) && defined(__has_include)
+            #if !defined(DRFLAC_SUPPORT_NEON) && !defined(DRFLAC_NO_NEON) && __has_include(<arm_neon.h>)
+                #define DRFLAC_SUPPORT_NEON
+            #endif
+        #endif
+
+        #if defined(DRFLAC_SUPPORT_NEON)
+            #include <arm_neon.h>
+        #endif
+    #endif
+#endif
+
+/* Compile-time CPU feature support. */
+#if !defined(DR_FLAC_NO_SIMD) && (defined(DRFLAC_X86) || defined(DRFLAC_X64))
+    #if defined(_MSC_VER) && !defined(__clang__)
+        #if _MSC_VER >= 1400
+            #include <intrin.h>
+            static void drflac__cpuid(int info[4], int fid)
+            {
+                __cpuid(info, fid);
+            }
+        #else
+            #define DRFLAC_NO_CPUID
+        #endif
+    #else
+        #if defined(__GNUC__) || defined(__clang__)
+            static void drflac__cpuid(int info[4], int fid)
+            {
+                /*
+                It looks like the -fPIC option uses the ebx register which GCC complains about. We can work around this by just using a different register, the
+                specific register of which I'm letting the compiler decide on. The "k" prefix is used to specify a 32-bit register. The {...} syntax is for
+                supporting different assembly dialects.
+
+                What's basically happening is that we're saving and restoring the ebx register manually.
+                */
+                #if defined(DRFLAC_X86) && defined(__PIC__)
+                    __asm__ __volatile__ (
+                        "xchg{l} {%%}ebx, %k1;"
+                        "cpuid;"
+                        "xchg{l} {%%}ebx, %k1;"
+                        : "=a"(info[0]), "=&r"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(fid), "c"(0)
+                    );
+                #else
+                    __asm__ __volatile__ (
+                        "cpuid" : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) : "a"(fid), "c"(0)
+                    );
+                #endif
+            }
+        #else
+            #define DRFLAC_NO_CPUID
+        #endif
+    #endif
+#else
+    #define DRFLAC_NO_CPUID
+#endif
+
+static DRFLAC_INLINE drflac_bool32 drflac_has_sse2(void)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE2)
+        #if defined(DRFLAC_X64)
+            return DRFLAC_TRUE;    /* 64-bit targets always support SSE2. */
+        #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE2__)
+            return DRFLAC_TRUE;    /* If the compiler is allowed to freely generate SSE2 code we can assume support. */
+        #else
+            #if defined(DRFLAC_NO_CPUID)
+                return DRFLAC_FALSE;
+            #else
+                int info[4];
+                drflac__cpuid(info, 1);
+                return (info[3] & (1 << 26)) != 0;
+            #endif
+        #endif
+    #else
+        return DRFLAC_FALSE;       /* SSE2 is only supported on x86 and x64 architectures. */
+    #endif
+#else
+    return DRFLAC_FALSE;           /* No compiler support. */
+#endif
+}
+
+static DRFLAC_INLINE drflac_bool32 drflac_has_sse41(void)
+{
+#if defined(DRFLAC_SUPPORT_SSE41)
+    #if (defined(DRFLAC_X64) || defined(DRFLAC_X86)) && !defined(DRFLAC_NO_SSE41)
+        #if defined(DRFLAC_X64)
+            return DRFLAC_TRUE;    /* 64-bit targets always support SSE4.1. */
+        #elif (defined(_M_IX86_FP) && _M_IX86_FP == 2) || defined(__SSE4_1__)
+            return DRFLAC_TRUE;    /* If the compiler is allowed to freely generate SSE41 code we can assume support. */
+        #else
+            #if defined(DRFLAC_NO_CPUID)
+                return DRFLAC_FALSE;
+            #else
+                int info[4];
+                drflac__cpuid(info, 1);
+                return (info[2] & (1 << 19)) != 0;
+            #endif
+        #endif
+    #else
+        return DRFLAC_FALSE;       /* SSE41 is only supported on x86 and x64 architectures. */
+    #endif
+#else
+    return DRFLAC_FALSE;           /* No compiler support. */
+#endif
+}
+
+
+#if defined(_MSC_VER) && _MSC_VER >= 1500 && (defined(DRFLAC_X86) || defined(DRFLAC_X64))
+    #define DRFLAC_HAS_LZCNT_INTRINSIC
+#elif (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)))
+    #define DRFLAC_HAS_LZCNT_INTRINSIC
+#elif defined(__clang__)
+    #if defined(__has_builtin)
+        #if __has_builtin(__builtin_clzll) || __has_builtin(__builtin_clzl)
+            #define DRFLAC_HAS_LZCNT_INTRINSIC
+        #endif
+    #endif
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+    #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
+    #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
+    #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
+#elif defined(__clang__)
+    #if defined(__has_builtin)
+        #if __has_builtin(__builtin_bswap16)
+            #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
+        #endif
+        #if __has_builtin(__builtin_bswap32)
+            #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
+        #endif
+        #if __has_builtin(__builtin_bswap64)
+            #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
+        #endif
+    #endif
+#elif defined(__GNUC__)
+    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+        #define DRFLAC_HAS_BYTESWAP32_INTRINSIC
+        #define DRFLAC_HAS_BYTESWAP64_INTRINSIC
+    #endif
+    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
+        #define DRFLAC_HAS_BYTESWAP16_INTRINSIC
+    #endif
+#endif
+
+
+/* Standard library stuff. */
+#ifndef DRFLAC_ASSERT
+#include <assert.h>
+#define DRFLAC_ASSERT(expression)           assert(expression)
+#endif
+#ifndef DRFLAC_MALLOC
+#define DRFLAC_MALLOC(sz)                   malloc((sz))
+#endif
+#ifndef DRFLAC_REALLOC
+#define DRFLAC_REALLOC(p, sz)               realloc((p), (sz))
+#endif
+#ifndef DRFLAC_FREE
+#define DRFLAC_FREE(p)                      free((p))
+#endif
+#ifndef DRFLAC_COPY_MEMORY
+#define DRFLAC_COPY_MEMORY(dst, src, sz)    memcpy((dst), (src), (sz))
+#endif
+#ifndef DRFLAC_ZERO_MEMORY
+#define DRFLAC_ZERO_MEMORY(p, sz)           memset((p), 0, (sz))
+#endif
+#ifndef DRFLAC_ZERO_OBJECT
+#define DRFLAC_ZERO_OBJECT(p)               DRFLAC_ZERO_MEMORY((p), sizeof(*(p)))
+#endif
+
+#define DRFLAC_MAX_SIMD_VECTOR_SIZE                     64  /* 64 for AVX-512 in the future. */
+
+typedef drflac_int32 drflac_result;
+#define DRFLAC_SUCCESS                                   0
+#define DRFLAC_ERROR                                    -1   /* A generic error. */
+#define DRFLAC_INVALID_ARGS                             -2
+#define DRFLAC_INVALID_OPERATION                        -3
+#define DRFLAC_OUT_OF_MEMORY                            -4
+#define DRFLAC_OUT_OF_RANGE                             -5
+#define DRFLAC_ACCESS_DENIED                            -6
+#define DRFLAC_DOES_NOT_EXIST                           -7
+#define DRFLAC_ALREADY_EXISTS                           -8
+#define DRFLAC_TOO_MANY_OPEN_FILES                      -9
+#define DRFLAC_INVALID_FILE                             -10
+#define DRFLAC_TOO_BIG                                  -11
+#define DRFLAC_PATH_TOO_LONG                            -12
+#define DRFLAC_NAME_TOO_LONG                            -13
+#define DRFLAC_NOT_DIRECTORY                            -14
+#define DRFLAC_IS_DIRECTORY                             -15
+#define DRFLAC_DIRECTORY_NOT_EMPTY                      -16
+#define DRFLAC_END_OF_FILE                              -17
+#define DRFLAC_NO_SPACE                                 -18
+#define DRFLAC_BUSY                                     -19
+#define DRFLAC_IO_ERROR                                 -20
+#define DRFLAC_INTERRUPT                                -21
+#define DRFLAC_UNAVAILABLE                              -22
+#define DRFLAC_ALREADY_IN_USE                           -23
+#define DRFLAC_BAD_ADDRESS                              -24
+#define DRFLAC_BAD_SEEK                                 -25
+#define DRFLAC_BAD_PIPE                                 -26
+#define DRFLAC_DEADLOCK                                 -27
+#define DRFLAC_TOO_MANY_LINKS                           -28
+#define DRFLAC_NOT_IMPLEMENTED                          -29
+#define DRFLAC_NO_MESSAGE                               -30
+#define DRFLAC_BAD_MESSAGE                              -31
+#define DRFLAC_NO_DATA_AVAILABLE                        -32
+#define DRFLAC_INVALID_DATA                             -33
+#define DRFLAC_TIMEOUT                                  -34
+#define DRFLAC_NO_NETWORK                               -35
+#define DRFLAC_NOT_UNIQUE                               -36
+#define DRFLAC_NOT_SOCKET                               -37
+#define DRFLAC_NO_ADDRESS                               -38
+#define DRFLAC_BAD_PROTOCOL                             -39
+#define DRFLAC_PROTOCOL_UNAVAILABLE                     -40
+#define DRFLAC_PROTOCOL_NOT_SUPPORTED                   -41
+#define DRFLAC_PROTOCOL_FAMILY_NOT_SUPPORTED            -42
+#define DRFLAC_ADDRESS_FAMILY_NOT_SUPPORTED             -43
+#define DRFLAC_SOCKET_NOT_SUPPORTED                     -44
+#define DRFLAC_CONNECTION_RESET                         -45
+#define DRFLAC_ALREADY_CONNECTED                        -46
+#define DRFLAC_NOT_CONNECTED                            -47
+#define DRFLAC_CONNECTION_REFUSED                       -48
+#define DRFLAC_NO_HOST                                  -49
+#define DRFLAC_IN_PROGRESS                              -50
+#define DRFLAC_CANCELLED                                -51
+#define DRFLAC_MEMORY_ALREADY_MAPPED                    -52
+#define DRFLAC_AT_END                                   -53
+#define DRFLAC_CRC_MISMATCH                             -128
+
+#define DRFLAC_SUBFRAME_CONSTANT                        0
+#define DRFLAC_SUBFRAME_VERBATIM                        1
+#define DRFLAC_SUBFRAME_FIXED                           8
+#define DRFLAC_SUBFRAME_LPC                             32
+#define DRFLAC_SUBFRAME_RESERVED                        255
+
+#define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE  0
+#define DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2 1
+
+#define DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT           0
+#define DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE             8
+#define DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE            9
+#define DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE              10
+
+#define drflac_align(x, a)                              ((((x) + (a) - 1) / (a)) * (a))
+
+
+DRFLAC_API void drflac_version(drflac_uint32* pMajor, drflac_uint32* pMinor, drflac_uint32* pRevision)
+{
+    if (pMajor) {
+        *pMajor = DRFLAC_VERSION_MAJOR;
+    }
+
+    if (pMinor) {
+        *pMinor = DRFLAC_VERSION_MINOR;
+    }
+
+    if (pRevision) {
+        *pRevision = DRFLAC_VERSION_REVISION;
+    }
+}
+
+DRFLAC_API const char* drflac_version_string()
+{
+    return DRFLAC_VERSION_STRING;
+}
+
+
+/* CPU caps. */
+#if defined(__has_feature)
+    #if __has_feature(thread_sanitizer)
+        #define DRFLAC_NO_THREAD_SANITIZE __attribute__((no_sanitize("thread")))
+    #else
+        #define DRFLAC_NO_THREAD_SANITIZE
+    #endif
+#else
+    #define DRFLAC_NO_THREAD_SANITIZE
+#endif
+
+#if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
+static drflac_bool32 drflac__gIsLZCNTSupported = DRFLAC_FALSE;
+#endif
+
+#ifndef DRFLAC_NO_CPUID
+static drflac_bool32 drflac__gIsSSE2Supported  = DRFLAC_FALSE;
+static drflac_bool32 drflac__gIsSSE41Supported = DRFLAC_FALSE;
+
+/*
+I've had a bug report that Clang's ThreadSanitizer presents a warning in this function. Having reviewed this, this does
+actually make sense. However, since CPU caps should never differ for a running process, I don't think the trade off of
+complicating internal API's by passing around CPU caps versus just disabling the warnings is worthwhile. I'm therefore
+just going to disable these warnings. This is disabled via the DRFLAC_NO_THREAD_SANITIZE attribute.
+*/
+DRFLAC_NO_THREAD_SANITIZE static void drflac__init_cpu_caps(void)
+{
+    static drflac_bool32 isCPUCapsInitialized = DRFLAC_FALSE;
+
+    if (!isCPUCapsInitialized) {
+        /* LZCNT */
+#if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
+        int info[4] = {0};
+        drflac__cpuid(info, 0x80000001);
+        drflac__gIsLZCNTSupported = (info[2] & (1 << 5)) != 0;
+#endif
+
+        /* SSE2 */
+        drflac__gIsSSE2Supported = drflac_has_sse2();
+
+        /* SSE4.1 */
+        drflac__gIsSSE41Supported = drflac_has_sse41();
+
+        /* Initialized. */
+        isCPUCapsInitialized = DRFLAC_TRUE;
+    }
+}
+#else
+static drflac_bool32 drflac__gIsNEONSupported  = DRFLAC_FALSE;
+
+static DRFLAC_INLINE drflac_bool32 drflac__has_neon(void)
+{
+#if defined(DRFLAC_SUPPORT_NEON)
+    #if defined(DRFLAC_ARM) && !defined(DRFLAC_NO_NEON)
+        #if (defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM64))
+            return DRFLAC_TRUE;    /* If the compiler is allowed to freely generate NEON code we can assume support. */
+        #else
+            /* TODO: Runtime check. */
+            return DRFLAC_FALSE;
+        #endif
+    #else
+        return DRFLAC_FALSE;       /* NEON is only supported on ARM architectures. */
+    #endif
+#else
+    return DRFLAC_FALSE;           /* No compiler support. */
+#endif
+}
+
+DRFLAC_NO_THREAD_SANITIZE static void drflac__init_cpu_caps(void)
+{
+    drflac__gIsNEONSupported = drflac__has_neon();
+
+#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5)
+    drflac__gIsLZCNTSupported = DRFLAC_TRUE;
+#endif
+}
+#endif
+
+
+/* Endian Management */
+static DRFLAC_INLINE drflac_bool32 drflac__is_little_endian(void)
+{
+#if defined(DRFLAC_X86) || defined(DRFLAC_X64)
+    return DRFLAC_TRUE;
+#elif defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN
+    return DRFLAC_TRUE;
+#else
+    int n = 1;
+    return (*(char*)&n) == 1;
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac__swap_endian_uint16(drflac_uint16 n)
+{
+#ifdef DRFLAC_HAS_BYTESWAP16_INTRINSIC
+    #if defined(_MSC_VER)
+        return _byteswap_ushort(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        return __builtin_bswap16(n);
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
+#else
+    return ((n & 0xFF00) >> 8) |
+           ((n & 0x00FF) << 8);
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac__swap_endian_uint32(drflac_uint32 n)
+{
+#ifdef DRFLAC_HAS_BYTESWAP32_INTRINSIC
+    #if defined(_MSC_VER)
+        return _byteswap_ulong(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        #if defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 6) && !defined(DRFLAC_64BIT)   /* <-- 64-bit inline assembly has not been tested, so disabling for now. */
+            /* Inline assembly optimized implementation for ARM. In my testing, GCC does not generate optimized code with __builtin_bswap32(). */
+            drflac_uint32 r;
+            __asm__ __volatile__ (
+            #if defined(DRFLAC_64BIT)
+                "rev %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(n)   /* <-- This is untested. If someone in the community could test this, that would be appreciated! */
+            #else
+                "rev %[out], %[in]" : [out]"=r"(r) : [in]"r"(n)
+            #endif
+            );
+            return r;
+        #else
+            return __builtin_bswap32(n);
+        #endif
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
+#else
+    return ((n & 0xFF000000) >> 24) |
+           ((n & 0x00FF0000) >>  8) |
+           ((n & 0x0000FF00) <<  8) |
+           ((n & 0x000000FF) << 24);
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint64 drflac__swap_endian_uint64(drflac_uint64 n)
+{
+#ifdef DRFLAC_HAS_BYTESWAP64_INTRINSIC
+    #if defined(_MSC_VER)
+        return _byteswap_uint64(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        return __builtin_bswap64(n);
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
+#else
+    return ((n & (drflac_uint64)0xFF00000000000000) >> 56) |
+           ((n & (drflac_uint64)0x00FF000000000000) >> 40) |
+           ((n & (drflac_uint64)0x0000FF0000000000) >> 24) |
+           ((n & (drflac_uint64)0x000000FF00000000) >>  8) |
+           ((n & (drflac_uint64)0x00000000FF000000) <<  8) |
+           ((n & (drflac_uint64)0x0000000000FF0000) << 24) |
+           ((n & (drflac_uint64)0x000000000000FF00) << 40) |
+           ((n & (drflac_uint64)0x00000000000000FF) << 56);
+#endif
+}
+
+
+static DRFLAC_INLINE drflac_uint16 drflac__be2host_16(drflac_uint16 n)
+{
+    if (drflac__is_little_endian()) {
+        return drflac__swap_endian_uint16(n);
+    }
+
+    return n;
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac__be2host_32(drflac_uint32 n)
+{
+    if (drflac__is_little_endian()) {
+        return drflac__swap_endian_uint32(n);
+    }
+
+    return n;
+}
+
+static DRFLAC_INLINE drflac_uint64 drflac__be2host_64(drflac_uint64 n)
+{
+    if (drflac__is_little_endian()) {
+        return drflac__swap_endian_uint64(n);
+    }
+
+    return n;
+}
+
+
+static DRFLAC_INLINE drflac_uint32 drflac__le2host_32(drflac_uint32 n)
+{
+    if (!drflac__is_little_endian()) {
+        return drflac__swap_endian_uint32(n);
+    }
+
+    return n;
+}
+
+
+static DRFLAC_INLINE drflac_uint32 drflac__unsynchsafe_32(drflac_uint32 n)
+{
+    drflac_uint32 result = 0;
+    result |= (n & 0x7F000000) >> 3;
+    result |= (n & 0x007F0000) >> 2;
+    result |= (n & 0x00007F00) >> 1;
+    result |= (n & 0x0000007F) >> 0;
+
+    return result;
+}
+
+
+
+/* The CRC code below is based on this document: http://zlib.net/crc_v3.txt */
+static drflac_uint8 drflac__crc8_table[] = {
+    0x00, 0x07, 0x0E, 0x09, 0x1C, 0x1B, 0x12, 0x15, 0x38, 0x3F, 0x36, 0x31, 0x24, 0x23, 0x2A, 0x2D,
+    0x70, 0x77, 0x7E, 0x79, 0x6C, 0x6B, 0x62, 0x65, 0x48, 0x4F, 0x46, 0x41, 0x54, 0x53, 0x5A, 0x5D,
+    0xE0, 0xE7, 0xEE, 0xE9, 0xFC, 0xFB, 0xF2, 0xF5, 0xD8, 0xDF, 0xD6, 0xD1, 0xC4, 0xC3, 0xCA, 0xCD,
+    0x90, 0x97, 0x9E, 0x99, 0x8C, 0x8B, 0x82, 0x85, 0xA8, 0xAF, 0xA6, 0xA1, 0xB4, 0xB3, 0xBA, 0xBD,
+    0xC7, 0xC0, 0xC9, 0xCE, 0xDB, 0xDC, 0xD5, 0xD2, 0xFF, 0xF8, 0xF1, 0xF6, 0xE3, 0xE4, 0xED, 0xEA,
+    0xB7, 0xB0, 0xB9, 0xBE, 0xAB, 0xAC, 0xA5, 0xA2, 0x8F, 0x88, 0x81, 0x86, 0x93, 0x94, 0x9D, 0x9A,
+    0x27, 0x20, 0x29, 0x2E, 0x3B, 0x3C, 0x35, 0x32, 0x1F, 0x18, 0x11, 0x16, 0x03, 0x04, 0x0D, 0x0A,
+    0x57, 0x50, 0x59, 0x5E, 0x4B, 0x4C, 0x45, 0x42, 0x6F, 0x68, 0x61, 0x66, 0x73, 0x74, 0x7D, 0x7A,
+    0x89, 0x8E, 0x87, 0x80, 0x95, 0x92, 0x9B, 0x9C, 0xB1, 0xB6, 0xBF, 0xB8, 0xAD, 0xAA, 0xA3, 0xA4,
+    0xF9, 0xFE, 0xF7, 0xF0, 0xE5, 0xE2, 0xEB, 0xEC, 0xC1, 0xC6, 0xCF, 0xC8, 0xDD, 0xDA, 0xD3, 0xD4,
+    0x69, 0x6E, 0x67, 0x60, 0x75, 0x72, 0x7B, 0x7C, 0x51, 0x56, 0x5F, 0x58, 0x4D, 0x4A, 0x43, 0x44,
+    0x19, 0x1E, 0x17, 0x10, 0x05, 0x02, 0x0B, 0x0C, 0x21, 0x26, 0x2F, 0x28, 0x3D, 0x3A, 0x33, 0x34,
+    0x4E, 0x49, 0x40, 0x47, 0x52, 0x55, 0x5C, 0x5B, 0x76, 0x71, 0x78, 0x7F, 0x6A, 0x6D, 0x64, 0x63,
+    0x3E, 0x39, 0x30, 0x37, 0x22, 0x25, 0x2C, 0x2B, 0x06, 0x01, 0x08, 0x0F, 0x1A, 0x1D, 0x14, 0x13,
+    0xAE, 0xA9, 0xA0, 0xA7, 0xB2, 0xB5, 0xBC, 0xBB, 0x96, 0x91, 0x98, 0x9F, 0x8A, 0x8D, 0x84, 0x83,
+    0xDE, 0xD9, 0xD0, 0xD7, 0xC2, 0xC5, 0xCC, 0xCB, 0xE6, 0xE1, 0xE8, 0xEF, 0xFA, 0xFD, 0xF4, 0xF3
+};
+
+static drflac_uint16 drflac__crc16_table[] = {
+    0x0000, 0x8005, 0x800F, 0x000A, 0x801B, 0x001E, 0x0014, 0x8011,
+    0x8033, 0x0036, 0x003C, 0x8039, 0x0028, 0x802D, 0x8027, 0x0022,
+    0x8063, 0x0066, 0x006C, 0x8069, 0x0078, 0x807D, 0x8077, 0x0072,
+    0x0050, 0x8055, 0x805F, 0x005A, 0x804B, 0x004E, 0x0044, 0x8041,
+    0x80C3, 0x00C6, 0x00CC, 0x80C9, 0x00D8, 0x80DD, 0x80D7, 0x00D2,
+    0x00F0, 0x80F5, 0x80FF, 0x00FA, 0x80EB, 0x00EE, 0x00E4, 0x80E1,
+    0x00A0, 0x80A5, 0x80AF, 0x00AA, 0x80BB, 0x00BE, 0x00B4, 0x80B1,
+    0x8093, 0x0096, 0x009C, 0x8099, 0x0088, 0x808D, 0x8087, 0x0082,
+    0x8183, 0x0186, 0x018C, 0x8189, 0x0198, 0x819D, 0x8197, 0x0192,
+    0x01B0, 0x81B5, 0x81BF, 0x01BA, 0x81AB, 0x01AE, 0x01A4, 0x81A1,
+    0x01E0, 0x81E5, 0x81EF, 0x01EA, 0x81FB, 0x01FE, 0x01F4, 0x81F1,
+    0x81D3, 0x01D6, 0x01DC, 0x81D9, 0x01C8, 0x81CD, 0x81C7, 0x01C2,
+    0x0140, 0x8145, 0x814F, 0x014A, 0x815B, 0x015E, 0x0154, 0x8151,
+    0x8173, 0x0176, 0x017C, 0x8179, 0x0168, 0x816D, 0x8167, 0x0162,
+    0x8123, 0x0126, 0x012C, 0x8129, 0x0138, 0x813D, 0x8137, 0x0132,
+    0x0110, 0x8115, 0x811F, 0x011A, 0x810B, 0x010E, 0x0104, 0x8101,
+    0x8303, 0x0306, 0x030C, 0x8309, 0x0318, 0x831D, 0x8317, 0x0312,
+    0x0330, 0x8335, 0x833F, 0x033A, 0x832B, 0x032E, 0x0324, 0x8321,
+    0x0360, 0x8365, 0x836F, 0x036A, 0x837B, 0x037E, 0x0374, 0x8371,
+    0x8353, 0x0356, 0x035C, 0x8359, 0x0348, 0x834D, 0x8347, 0x0342,
+    0x03C0, 0x83C5, 0x83CF, 0x03CA, 0x83DB, 0x03DE, 0x03D4, 0x83D1,
+    0x83F3, 0x03F6, 0x03FC, 0x83F9, 0x03E8, 0x83ED, 0x83E7, 0x03E2,
+    0x83A3, 0x03A6, 0x03AC, 0x83A9, 0x03B8, 0x83BD, 0x83B7, 0x03B2,
+    0x0390, 0x8395, 0x839F, 0x039A, 0x838B, 0x038E, 0x0384, 0x8381,
+    0x0280, 0x8285, 0x828F, 0x028A, 0x829B, 0x029E, 0x0294, 0x8291,
+    0x82B3, 0x02B6, 0x02BC, 0x82B9, 0x02A8, 0x82AD, 0x82A7, 0x02A2,
+    0x82E3, 0x02E6, 0x02EC, 0x82E9, 0x02F8, 0x82FD, 0x82F7, 0x02F2,
+    0x02D0, 0x82D5, 0x82DF, 0x02DA, 0x82CB, 0x02CE, 0x02C4, 0x82C1,
+    0x8243, 0x0246, 0x024C, 0x8249, 0x0258, 0x825D, 0x8257, 0x0252,
+    0x0270, 0x8275, 0x827F, 0x027A, 0x826B, 0x026E, 0x0264, 0x8261,
+    0x0220, 0x8225, 0x822F, 0x022A, 0x823B, 0x023E, 0x0234, 0x8231,
+    0x8213, 0x0216, 0x021C, 0x8219, 0x0208, 0x820D, 0x8207, 0x0202
+};
+
+static DRFLAC_INLINE drflac_uint8 drflac_crc8_byte(drflac_uint8 crc, drflac_uint8 data)
+{
+    return drflac__crc8_table[crc ^ data];
+}
+
+static DRFLAC_INLINE drflac_uint8 drflac_crc8(drflac_uint8 crc, drflac_uint32 data, drflac_uint32 count)
+{
+#ifdef DR_FLAC_NO_CRC
+    (void)crc;
+    (void)data;
+    (void)count;
+    return 0;
+#else
+#if 0
+    /* REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc8(crc, 0, 8);") */
+    drflac_uint8 p = 0x07;
+    for (int i = count-1; i >= 0; --i) {
+        drflac_uint8 bit = (data & (1 << i)) >> i;
+        if (crc & 0x80) {
+            crc = ((crc << 1) | bit) ^ p;
+        } else {
+            crc = ((crc << 1) | bit);
+        }
+    }
+    return crc;
+#else
+    drflac_uint32 wholeBytes;
+    drflac_uint32 leftoverBits;
+    drflac_uint64 leftoverDataMask;
+
+    static drflac_uint64 leftoverDataMaskTable[8] = {
+        0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
+    };
+
+    DRFLAC_ASSERT(count <= 32);
+
+    wholeBytes = count >> 3;
+    leftoverBits = count - (wholeBytes*8);
+    leftoverDataMask = leftoverDataMaskTable[leftoverBits];
+
+    switch (wholeBytes) {
+        case 4: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0xFF000000UL << leftoverBits)) >> (24 + leftoverBits)));
+        case 3: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x00FF0000UL << leftoverBits)) >> (16 + leftoverBits)));
+        case 2: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x0000FF00UL << leftoverBits)) >> ( 8 + leftoverBits)));
+        case 1: crc = drflac_crc8_byte(crc, (drflac_uint8)((data & (0x000000FFUL << leftoverBits)) >> ( 0 + leftoverBits)));
+        case 0: if (leftoverBits > 0) crc = (drflac_uint8)((crc << leftoverBits) ^ drflac__crc8_table[(crc >> (8 - leftoverBits)) ^ (data & leftoverDataMask)]);
+    }
+    return crc;
+#endif
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16_byte(drflac_uint16 crc, drflac_uint8 data)
+{
+    return (crc << 8) ^ drflac__crc16_table[(drflac_uint8)(crc >> 8) ^ data];
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16_cache(drflac_uint16 crc, drflac_cache_t data)
+{
+#ifdef DRFLAC_64BIT
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 56) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 48) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 40) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 32) & 0xFF));
+#endif
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 24) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 16) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  8) & 0xFF));
+    crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  0) & 0xFF));
+
+    return crc;
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16_bytes(drflac_uint16 crc, drflac_cache_t data, drflac_uint32 byteCount)
+{
+    switch (byteCount)
+    {
+#ifdef DRFLAC_64BIT
+    case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 56) & 0xFF));
+    case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 48) & 0xFF));
+    case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 40) & 0xFF));
+    case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 32) & 0xFF));
+#endif
+    case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 24) & 0xFF));
+    case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >> 16) & 0xFF));
+    case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  8) & 0xFF));
+    case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data >>  0) & 0xFF));
+    }
+
+    return crc;
+}
+
+#if 0
+static DRFLAC_INLINE drflac_uint16 drflac_crc16__32bit(drflac_uint16 crc, drflac_uint32 data, drflac_uint32 count)
+{
+#ifdef DR_FLAC_NO_CRC
+    (void)crc;
+    (void)data;
+    (void)count;
+    return 0;
+#else
+#if 0
+    /* REFERENCE (use of this implementation requires an explicit flush by doing "drflac_crc16(crc, 0, 16);") */
+    drflac_uint16 p = 0x8005;
+    for (int i = count-1; i >= 0; --i) {
+        drflac_uint16 bit = (data & (1ULL << i)) >> i;
+        if (r & 0x8000) {
+            r = ((r << 1) | bit) ^ p;
+        } else {
+            r = ((r << 1) | bit);
+        }
+    }
+
+    return crc;
+#else
+    drflac_uint32 wholeBytes;
+    drflac_uint32 leftoverBits;
+    drflac_uint64 leftoverDataMask;
+
+    static drflac_uint64 leftoverDataMaskTable[8] = {
+        0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
+    };
+
+    DRFLAC_ASSERT(count <= 64);
+
+    wholeBytes = count >> 3;
+    leftoverBits = count & 7;
+    leftoverDataMask = leftoverDataMaskTable[leftoverBits];
+
+    switch (wholeBytes) {
+        default:
+        case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0xFF000000UL << leftoverBits)) >> (24 + leftoverBits)));
+        case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x00FF0000UL << leftoverBits)) >> (16 + leftoverBits)));
+        case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x0000FF00UL << leftoverBits)) >> ( 8 + leftoverBits)));
+        case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (0x000000FFUL << leftoverBits)) >> ( 0 + leftoverBits)));
+        case 0: if (leftoverBits > 0) crc = (crc << leftoverBits) ^ drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)];
+    }
+    return crc;
+#endif
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16__64bit(drflac_uint16 crc, drflac_uint64 data, drflac_uint32 count)
+{
+#ifdef DR_FLAC_NO_CRC
+    (void)crc;
+    (void)data;
+    (void)count;
+    return 0;
+#else
+    drflac_uint32 wholeBytes;
+    drflac_uint32 leftoverBits;
+    drflac_uint64 leftoverDataMask;
+
+    static drflac_uint64 leftoverDataMaskTable[8] = {
+        0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F, 0x7F
+    };
+
+    DRFLAC_ASSERT(count <= 64);
+
+    wholeBytes = count >> 3;
+    leftoverBits = count & 7;
+    leftoverDataMask = leftoverDataMaskTable[leftoverBits];
+
+    switch (wholeBytes) {
+        default:
+        case 8: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0xFF000000 << 32) << leftoverBits)) >> (56 + leftoverBits)));    /* Weird "<< 32" bitshift is required for C89 because it doesn't support 64-bit constants. Should be optimized out by a good compiler. */
+        case 7: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x00FF0000 << 32) << leftoverBits)) >> (48 + leftoverBits)));
+        case 6: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x0000FF00 << 32) << leftoverBits)) >> (40 + leftoverBits)));
+        case 5: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x000000FF << 32) << leftoverBits)) >> (32 + leftoverBits)));
+        case 4: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0xFF000000      ) << leftoverBits)) >> (24 + leftoverBits)));
+        case 3: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x00FF0000      ) << leftoverBits)) >> (16 + leftoverBits)));
+        case 2: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x0000FF00      ) << leftoverBits)) >> ( 8 + leftoverBits)));
+        case 1: crc = drflac_crc16_byte(crc, (drflac_uint8)((data & (((drflac_uint64)0x000000FF      ) << leftoverBits)) >> ( 0 + leftoverBits)));
+        case 0: if (leftoverBits > 0) crc = (crc << leftoverBits) ^ drflac__crc16_table[(crc >> (16 - leftoverBits)) ^ (data & leftoverDataMask)];
+    }
+    return crc;
+#endif
+}
+
+
+static DRFLAC_INLINE drflac_uint16 drflac_crc16(drflac_uint16 crc, drflac_cache_t data, drflac_uint32 count)
+{
+#ifdef DRFLAC_64BIT
+    return drflac_crc16__64bit(crc, data, count);
+#else
+    return drflac_crc16__32bit(crc, data, count);
+#endif
+}
+#endif
+
+
+#ifdef DRFLAC_64BIT
+#define drflac__be2host__cache_line drflac__be2host_64
+#else
+#define drflac__be2host__cache_line drflac__be2host_32
+#endif
+
+/*
+BIT READING ATTEMPT #2
+
+This uses a 32- or 64-bit bit-shifted cache - as bits are read, the cache is shifted such that the first valid bit is sitting
+on the most significant bit. It uses the notion of an L1 and L2 cache (borrowed from CPU architecture), where the L1 cache
+is a 32- or 64-bit unsigned integer (depending on whether or not a 32- or 64-bit build is being compiled) and the L2 is an
+array of "cache lines", with each cache line being the same size as the L1. The L2 is a buffer of about 4KB and is where data
+from onRead() is read into.
+*/
+#define DRFLAC_CACHE_L1_SIZE_BYTES(bs)                      (sizeof((bs)->cache))
+#define DRFLAC_CACHE_L1_SIZE_BITS(bs)                       (sizeof((bs)->cache)*8)
+#define DRFLAC_CACHE_L1_BITS_REMAINING(bs)                  (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (bs)->consumedBits)
+#define DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount)           (~((~(drflac_cache_t)0) >> (_bitCount)))
+#define DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, _bitCount)      (DRFLAC_CACHE_L1_SIZE_BITS(bs) - (_bitCount))
+#define DRFLAC_CACHE_L1_SELECT(bs, _bitCount)               (((bs)->cache) & DRFLAC_CACHE_L1_SELECTION_MASK(_bitCount))
+#define DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, _bitCount)     (DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >>  DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount)))
+#define DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, _bitCount)(DRFLAC_CACHE_L1_SELECT((bs), (_bitCount)) >> (DRFLAC_CACHE_L1_SELECTION_SHIFT((bs), (_bitCount)) & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1)))
+#define DRFLAC_CACHE_L2_SIZE_BYTES(bs)                      (sizeof((bs)->cacheL2))
+#define DRFLAC_CACHE_L2_LINE_COUNT(bs)                      (DRFLAC_CACHE_L2_SIZE_BYTES(bs) / sizeof((bs)->cacheL2[0]))
+#define DRFLAC_CACHE_L2_LINES_REMAINING(bs)                 (DRFLAC_CACHE_L2_LINE_COUNT(bs) - (bs)->nextL2Line)
+
+
+#ifndef DR_FLAC_NO_CRC
+static DRFLAC_INLINE void drflac__reset_crc16(drflac_bs* bs)
+{
+    bs->crc16 = 0;
+    bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3;
+}
+
+static DRFLAC_INLINE void drflac__update_crc16(drflac_bs* bs)
+{
+    if (bs->crc16CacheIgnoredBytes == 0) {
+        bs->crc16 = drflac_crc16_cache(bs->crc16, bs->crc16Cache);
+    } else {
+        bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache, DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bs->crc16CacheIgnoredBytes);
+        bs->crc16CacheIgnoredBytes = 0;
+    }
+}
+
+static DRFLAC_INLINE drflac_uint16 drflac__flush_crc16(drflac_bs* bs)
+{
+    /* We should never be flushing in a situation where we are not aligned on a byte boundary. */
+    DRFLAC_ASSERT((DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7) == 0);
+
+    /*
+    The bits that were read from the L1 cache need to be accumulated. The number of bytes needing to be accumulated is determined
+    by the number of bits that have been consumed.
+    */
+    if (DRFLAC_CACHE_L1_BITS_REMAINING(bs) == 0) {
+        drflac__update_crc16(bs);
+    } else {
+        /* We only accumulate the consumed bits. */
+        bs->crc16 = drflac_crc16_bytes(bs->crc16, bs->crc16Cache >> DRFLAC_CACHE_L1_BITS_REMAINING(bs), (bs->consumedBits >> 3) - bs->crc16CacheIgnoredBytes);
+
+        /*
+        The bits that we just accumulated should never be accumulated again. We need to keep track of how many bytes were accumulated
+        so we can handle that later.
+        */
+        bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3;
+    }
+
+    return bs->crc16;
+}
+#endif
+
+static DRFLAC_INLINE drflac_bool32 drflac__reload_l1_cache_from_l2(drflac_bs* bs)
+{
+    size_t bytesRead;
+    size_t alignedL1LineCount;
+
+    /* Fast path. Try loading straight from L2. */
+    if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+        bs->cache = bs->cacheL2[bs->nextL2Line++];
+        return DRFLAC_TRUE;
+    }
+
+    /*
+    If we get here it means we've run out of data in the L2 cache. We'll need to fetch more from the client, if there's
+    any left.
+    */
+    if (bs->unalignedByteCount > 0) {
+        return DRFLAC_FALSE;   /* If we have any unaligned bytes it means there's no more aligned bytes left in the client. */
+    }
+
+    bytesRead = bs->onRead(bs->pUserData, bs->cacheL2, DRFLAC_CACHE_L2_SIZE_BYTES(bs));
+
+    bs->nextL2Line = 0;
+    if (bytesRead == DRFLAC_CACHE_L2_SIZE_BYTES(bs)) {
+        bs->cache = bs->cacheL2[bs->nextL2Line++];
+        return DRFLAC_TRUE;
+    }
+
+
+    /*
+    If we get here it means we were unable to retrieve enough data to fill the entire L2 cache. It probably
+    means we've just reached the end of the file. We need to move the valid data down to the end of the buffer
+    and adjust the index of the next line accordingly. Also keep in mind that the L2 cache must be aligned to
+    the size of the L1 so we'll need to seek backwards by any misaligned bytes.
+    */
+    alignedL1LineCount = bytesRead / DRFLAC_CACHE_L1_SIZE_BYTES(bs);
+
+    /* We need to keep track of any unaligned bytes for later use. */
+    bs->unalignedByteCount = bytesRead - (alignedL1LineCount * DRFLAC_CACHE_L1_SIZE_BYTES(bs));
+    if (bs->unalignedByteCount > 0) {
+        bs->unalignedCache = bs->cacheL2[alignedL1LineCount];
+    }
+
+    if (alignedL1LineCount > 0) {
+        size_t offset = DRFLAC_CACHE_L2_LINE_COUNT(bs) - alignedL1LineCount;
+        size_t i;
+        for (i = alignedL1LineCount; i > 0; --i) {
+            bs->cacheL2[i-1 + offset] = bs->cacheL2[i-1];
+        }
+
+        bs->nextL2Line = (drflac_uint32)offset;
+        bs->cache = bs->cacheL2[bs->nextL2Line++];
+        return DRFLAC_TRUE;
+    } else {
+        /* If we get into this branch it means we weren't able to load any L1-aligned data. */
+        bs->nextL2Line = DRFLAC_CACHE_L2_LINE_COUNT(bs);
+        return DRFLAC_FALSE;
+    }
+}
+
+static drflac_bool32 drflac__reload_cache(drflac_bs* bs)
+{
+    size_t bytesRead;
+
+#ifndef DR_FLAC_NO_CRC
+    drflac__update_crc16(bs);
+#endif
+
+    /* Fast path. Try just moving the next value in the L2 cache to the L1 cache. */
+    if (drflac__reload_l1_cache_from_l2(bs)) {
+        bs->cache = drflac__be2host__cache_line(bs->cache);
+        bs->consumedBits = 0;
+#ifndef DR_FLAC_NO_CRC
+        bs->crc16Cache = bs->cache;
+#endif
+        return DRFLAC_TRUE;
+    }
+
+    /* Slow path. */
+
+    /*
+    If we get here it means we have failed to load the L1 cache from the L2. Likely we've just reached the end of the stream and the last
+    few bytes did not meet the alignment requirements for the L2 cache. In this case we need to fall back to a slower path and read the
+    data from the unaligned cache.
+    */
+    bytesRead = bs->unalignedByteCount;
+    if (bytesRead == 0) {
+        bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);   /* <-- The stream has been exhausted, so marked the bits as consumed. */
+        return DRFLAC_FALSE;
+    }
+
+    DRFLAC_ASSERT(bytesRead < DRFLAC_CACHE_L1_SIZE_BYTES(bs));
+    bs->consumedBits = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BYTES(bs) - bytesRead) * 8;
+
+    bs->cache = drflac__be2host__cache_line(bs->unalignedCache);
+    bs->cache &= DRFLAC_CACHE_L1_SELECTION_MASK(DRFLAC_CACHE_L1_BITS_REMAINING(bs));    /* <-- Make sure the consumed bits are always set to zero. Other parts of the library depend on this property. */
+    bs->unalignedByteCount = 0;     /* <-- At this point the unaligned bytes have been moved into the cache and we thus have no more unaligned bytes. */
+
+#ifndef DR_FLAC_NO_CRC
+    bs->crc16Cache = bs->cache >> bs->consumedBits;
+    bs->crc16CacheIgnoredBytes = bs->consumedBits >> 3;
+#endif
+    return DRFLAC_TRUE;
+}
+
+static void drflac__reset_cache(drflac_bs* bs)
+{
+    bs->nextL2Line   = DRFLAC_CACHE_L2_LINE_COUNT(bs);  /* <-- This clears the L2 cache. */
+    bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);   /* <-- This clears the L1 cache. */
+    bs->cache = 0;
+    bs->unalignedByteCount = 0;                         /* <-- This clears the trailing unaligned bytes. */
+    bs->unalignedCache = 0;
+
+#ifndef DR_FLAC_NO_CRC
+    bs->crc16Cache = 0;
+    bs->crc16CacheIgnoredBytes = 0;
+#endif
+}
+
+
+static DRFLAC_INLINE drflac_bool32 drflac__read_uint32(drflac_bs* bs, unsigned int bitCount, drflac_uint32* pResultOut)
+{
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResultOut != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 32);
+
+    if (bs->consumedBits == DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
+        if (!drflac__reload_cache(bs)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    if (bitCount <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+        /*
+        If we want to load all 32-bits from a 32-bit cache we need to do it slightly differently because we can't do
+        a 32-bit shift on a 32-bit integer. This will never be the case on 64-bit caches, so we can have a slightly
+        more optimal solution for this.
+        */
+#ifdef DRFLAC_64BIT
+        *pResultOut = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount);
+        bs->consumedBits += bitCount;
+        bs->cache <<= bitCount;
+#else
+        if (bitCount < DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
+            *pResultOut = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCount);
+            bs->consumedBits += bitCount;
+            bs->cache <<= bitCount;
+        } else {
+            /* Cannot shift by 32-bits, so need to do it differently. */
+            *pResultOut = (drflac_uint32)bs->cache;
+            bs->consumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs);
+            bs->cache = 0;
+        }
+#endif
+
+        return DRFLAC_TRUE;
+    } else {
+        /* It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. */
+        drflac_uint32 bitCountHi = DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        drflac_uint32 bitCountLo = bitCount - bitCountHi;
+        drflac_uint32 resultHi;
+
+        DRFLAC_ASSERT(bitCountHi > 0);
+        DRFLAC_ASSERT(bitCountHi < 32);
+        resultHi = (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountHi);
+
+        if (!drflac__reload_cache(bs)) {
+            return DRFLAC_FALSE;
+        }
+
+        *pResultOut = (resultHi << bitCountLo) | (drflac_uint32)DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, bitCountLo);
+        bs->consumedBits += bitCountLo;
+        bs->cache <<= bitCountLo;
+        return DRFLAC_TRUE;
+    }
+}
+
+static drflac_bool32 drflac__read_int32(drflac_bs* bs, unsigned int bitCount, drflac_int32* pResult)
+{
+    drflac_uint32 result;
+    drflac_uint32 signbit;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResult != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 32);
+
+    if (!drflac__read_uint32(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    signbit = ((result >> (bitCount-1)) & 0x01);
+    result |= (~signbit + 1) << bitCount;
+
+    *pResult = (drflac_int32)result;
+    return DRFLAC_TRUE;
+}
+
+#ifdef DRFLAC_64BIT
+static drflac_bool32 drflac__read_uint64(drflac_bs* bs, unsigned int bitCount, drflac_uint64* pResultOut)
+{
+    drflac_uint32 resultHi;
+    drflac_uint32 resultLo;
+
+    DRFLAC_ASSERT(bitCount <= 64);
+    DRFLAC_ASSERT(bitCount >  32);
+
+    if (!drflac__read_uint32(bs, bitCount - 32, &resultHi)) {
+        return DRFLAC_FALSE;
+    }
+
+    if (!drflac__read_uint32(bs, 32, &resultLo)) {
+        return DRFLAC_FALSE;
+    }
+
+    *pResultOut = (((drflac_uint64)resultHi) << 32) | ((drflac_uint64)resultLo);
+    return DRFLAC_TRUE;
+}
+#endif
+
+/* Function below is unused, but leaving it here in case I need to quickly add it again. */
+#if 0
+static drflac_bool32 drflac__read_int64(drflac_bs* bs, unsigned int bitCount, drflac_int64* pResultOut)
+{
+    drflac_uint64 result;
+    drflac_uint64 signbit;
+
+    DRFLAC_ASSERT(bitCount <= 64);
+
+    if (!drflac__read_uint64(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    signbit = ((result >> (bitCount-1)) & 0x01);
+    result |= (~signbit + 1) << bitCount;
+
+    *pResultOut = (drflac_int64)result;
+    return DRFLAC_TRUE;
+}
+#endif
+
+static drflac_bool32 drflac__read_uint16(drflac_bs* bs, unsigned int bitCount, drflac_uint16* pResult)
+{
+    drflac_uint32 result;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResult != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 16);
+
+    if (!drflac__read_uint32(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    *pResult = (drflac_uint16)result;
+    return DRFLAC_TRUE;
+}
+
+#if 0
+static drflac_bool32 drflac__read_int16(drflac_bs* bs, unsigned int bitCount, drflac_int16* pResult)
+{
+    drflac_int32 result;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResult != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 16);
+
+    if (!drflac__read_int32(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    *pResult = (drflac_int16)result;
+    return DRFLAC_TRUE;
+}
+#endif
+
+static drflac_bool32 drflac__read_uint8(drflac_bs* bs, unsigned int bitCount, drflac_uint8* pResult)
+{
+    drflac_uint32 result;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResult != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 8);
+
+    if (!drflac__read_uint32(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    *pResult = (drflac_uint8)result;
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__read_int8(drflac_bs* bs, unsigned int bitCount, drflac_int8* pResult)
+{
+    drflac_int32 result;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pResult != NULL);
+    DRFLAC_ASSERT(bitCount > 0);
+    DRFLAC_ASSERT(bitCount <= 8);
+
+    if (!drflac__read_int32(bs, bitCount, &result)) {
+        return DRFLAC_FALSE;
+    }
+
+    *pResult = (drflac_int8)result;
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_bool32 drflac__seek_bits(drflac_bs* bs, size_t bitsToSeek)
+{
+    if (bitsToSeek <= DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+        bs->consumedBits += (drflac_uint32)bitsToSeek;
+        bs->cache <<= bitsToSeek;
+        return DRFLAC_TRUE;
+    } else {
+        /* It straddles the cached data. This function isn't called too frequently so I'm favouring simplicity here. */
+        bitsToSeek       -= DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        bs->consumedBits += DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        bs->cache         = 0;
+
+        /* Simple case. Seek in groups of the same number as bits that fit within a cache line. */
+#ifdef DRFLAC_64BIT
+        while (bitsToSeek >= DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
+            drflac_uint64 bin;
+            if (!drflac__read_uint64(bs, DRFLAC_CACHE_L1_SIZE_BITS(bs), &bin)) {
+                return DRFLAC_FALSE;
+            }
+            bitsToSeek -= DRFLAC_CACHE_L1_SIZE_BITS(bs);
+        }
+#else
+        while (bitsToSeek >= DRFLAC_CACHE_L1_SIZE_BITS(bs)) {
+            drflac_uint32 bin;
+            if (!drflac__read_uint32(bs, DRFLAC_CACHE_L1_SIZE_BITS(bs), &bin)) {
+                return DRFLAC_FALSE;
+            }
+            bitsToSeek -= DRFLAC_CACHE_L1_SIZE_BITS(bs);
+        }
+#endif
+
+        /* Whole leftover bytes. */
+        while (bitsToSeek >= 8) {
+            drflac_uint8 bin;
+            if (!drflac__read_uint8(bs, 8, &bin)) {
+                return DRFLAC_FALSE;
+            }
+            bitsToSeek -= 8;
+        }
+
+        /* Leftover bits. */
+        if (bitsToSeek > 0) {
+            drflac_uint8 bin;
+            if (!drflac__read_uint8(bs, (drflac_uint32)bitsToSeek, &bin)) {
+                return DRFLAC_FALSE;
+            }
+            bitsToSeek = 0; /* <-- Necessary for the assert below. */
+        }
+
+        DRFLAC_ASSERT(bitsToSeek == 0);
+        return DRFLAC_TRUE;
+    }
+}
+
+
+/* This function moves the bit streamer to the first bit after the sync code (bit 15 of the of the frame header). It will also update the CRC-16. */
+static drflac_bool32 drflac__find_and_seek_to_next_sync_code(drflac_bs* bs)
+{
+    DRFLAC_ASSERT(bs != NULL);
+
+    /*
+    The sync code is always aligned to 8 bits. This is convenient for us because it means we can do byte-aligned movements. The first
+    thing to do is align to the next byte.
+    */
+    if (!drflac__seek_bits(bs, DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7)) {
+        return DRFLAC_FALSE;
+    }
+
+    for (;;) {
+        drflac_uint8 hi;
+
+#ifndef DR_FLAC_NO_CRC
+        drflac__reset_crc16(bs);
+#endif
+
+        if (!drflac__read_uint8(bs, 8, &hi)) {
+            return DRFLAC_FALSE;
+        }
+
+        if (hi == 0xFF) {
+            drflac_uint8 lo;
+            if (!drflac__read_uint8(bs, 6, &lo)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (lo == 0x3E) {
+                return DRFLAC_TRUE;
+            } else {
+                if (!drflac__seek_bits(bs, DRFLAC_CACHE_L1_BITS_REMAINING(bs) & 7)) {
+                    return DRFLAC_FALSE;
+                }
+            }
+        }
+    }
+
+    /* Should never get here. */
+    /*return DRFLAC_FALSE;*/
+}
+
+
+#if defined(DRFLAC_HAS_LZCNT_INTRINSIC)
+#define DRFLAC_IMPLEMENT_CLZ_LZCNT
+#endif
+#if  defined(_MSC_VER) && _MSC_VER >= 1400 && (defined(DRFLAC_X64) || defined(DRFLAC_X86))
+#define DRFLAC_IMPLEMENT_CLZ_MSVC
+#endif
+
+static DRFLAC_INLINE drflac_uint32 drflac__clz_software(drflac_cache_t x)
+{
+    drflac_uint32 n;
+    static drflac_uint32 clz_table_4[] = {
+        0,
+        4,
+        3, 3,
+        2, 2, 2, 2,
+        1, 1, 1, 1, 1, 1, 1, 1
+    };
+
+    if (x == 0) {
+        return sizeof(x)*8;
+    }
+
+    n = clz_table_4[x >> (sizeof(x)*8 - 4)];
+    if (n == 0) {
+#ifdef DRFLAC_64BIT
+        if ((x & ((drflac_uint64)0xFFFFFFFF << 32)) == 0) { n  = 32; x <<= 32; }
+        if ((x & ((drflac_uint64)0xFFFF0000 << 32)) == 0) { n += 16; x <<= 16; }
+        if ((x & ((drflac_uint64)0xFF000000 << 32)) == 0) { n += 8;  x <<= 8;  }
+        if ((x & ((drflac_uint64)0xF0000000 << 32)) == 0) { n += 4;  x <<= 4;  }
+#else
+        if ((x & 0xFFFF0000) == 0) { n  = 16; x <<= 16; }
+        if ((x & 0xFF000000) == 0) { n += 8;  x <<= 8;  }
+        if ((x & 0xF0000000) == 0) { n += 4;  x <<= 4;  }
+#endif
+        n += clz_table_4[x >> (sizeof(x)*8 - 4)];
+    }
+
+    return n - 1;
+}
+
+#ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT
+static DRFLAC_INLINE drflac_bool32 drflac__is_lzcnt_supported(void)
+{
+    /* Fast compile time check for ARM. */
+#if defined(DRFLAC_HAS_LZCNT_INTRINSIC) && defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5)
+    return DRFLAC_TRUE;
+#else
+    /* If the compiler itself does not support the intrinsic then we'll need to return false. */
+    #ifdef DRFLAC_HAS_LZCNT_INTRINSIC
+        return drflac__gIsLZCNTSupported;
+    #else
+        return DRFLAC_FALSE;
+    #endif
+#endif
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac__clz_lzcnt(drflac_cache_t x)
+{
+#if defined(_MSC_VER) && !defined(__clang__)
+    #ifdef DRFLAC_64BIT
+        return (drflac_uint32)__lzcnt64(x);
+    #else
+        return (drflac_uint32)__lzcnt(x);
+    #endif
+#else
+    #if defined(__GNUC__) || defined(__clang__)
+        #if defined(DRFLAC_X64)
+            {
+                drflac_uint64 r;
+                __asm__ __volatile__ (
+                    "lzcnt{ %1, %0| %0, %1}" : "=r"(r) : "r"(x)
+                );
+
+                return (drflac_uint32)r;
+            }
+        #elif defined(DRFLAC_X86)
+            {
+                drflac_uint32 r;
+                __asm__ __volatile__ (
+                    "lzcnt{l %1, %0| %0, %1}" : "=r"(r) : "r"(x)
+                );
+
+                return r;
+            }
+        #elif defined(DRFLAC_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 5) && !defined(DRFLAC_64BIT)   /* <-- I haven't tested 64-bit inline assembly, so only enabling this for the 32-bit build for now. */
+            {
+                unsigned int r;
+                __asm__ __volatile__ (
+                #if defined(DRFLAC_64BIT)
+                    "clz %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(x)   /* <-- This is untested. If someone in the community could test this, that would be appreciated! */
+                #else
+                    "clz %[out], %[in]" : [out]"=r"(r) : [in]"r"(x)
+                #endif
+                );
+
+                return r;
+            }
+        #else
+            if (x == 0) {
+                return sizeof(x)*8;
+            }
+            #ifdef DRFLAC_64BIT
+                return (drflac_uint32)__builtin_clzll((drflac_uint64)x);
+            #else
+                return (drflac_uint32)__builtin_clzl((drflac_uint32)x);
+            #endif
+        #endif
+    #else
+        /* Unsupported compiler. */
+        #error "This compiler does not support the lzcnt intrinsic."
+    #endif
+#endif
+}
+#endif
+
+#ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
+#include <intrin.h> /* For BitScanReverse(). */
+
+static DRFLAC_INLINE drflac_uint32 drflac__clz_msvc(drflac_cache_t x)
+{
+    drflac_uint32 n;
+
+    if (x == 0) {
+        return sizeof(x)*8;
+    }
+
+#ifdef DRFLAC_64BIT
+    _BitScanReverse64((unsigned long*)&n, x);
+#else
+    _BitScanReverse((unsigned long*)&n, x);
+#endif
+    return sizeof(x)*8 - n - 1;
+}
+#endif
+
+static DRFLAC_INLINE drflac_uint32 drflac__clz(drflac_cache_t x)
+{
+#ifdef DRFLAC_IMPLEMENT_CLZ_LZCNT
+    if (drflac__is_lzcnt_supported()) {
+        return drflac__clz_lzcnt(x);
+    } else
+#endif
+    {
+#ifdef DRFLAC_IMPLEMENT_CLZ_MSVC
+        return drflac__clz_msvc(x);
+#else
+        return drflac__clz_software(x);
+#endif
+    }
+}
+
+
+static DRFLAC_INLINE drflac_bool32 drflac__seek_past_next_set_bit(drflac_bs* bs, unsigned int* pOffsetOut)
+{
+    drflac_uint32 zeroCounter = 0;
+    drflac_uint32 setBitOffsetPlus1;
+
+    while (bs->cache == 0) {
+        zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        if (!drflac__reload_cache(bs)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    setBitOffsetPlus1 = drflac__clz(bs->cache);
+    setBitOffsetPlus1 += 1;
+
+    bs->consumedBits += setBitOffsetPlus1;
+    bs->cache <<= setBitOffsetPlus1;
+
+    *pOffsetOut = zeroCounter + setBitOffsetPlus1 - 1;
+    return DRFLAC_TRUE;
+}
+
+
+
+static drflac_bool32 drflac__seek_to_byte(drflac_bs* bs, drflac_uint64 offsetFromStart)
+{
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(offsetFromStart > 0);
+
+    /*
+    Seeking from the start is not quite as trivial as it sounds because the onSeek callback takes a signed 32-bit integer (which
+    is intentional because it simplifies the implementation of the onSeek callbacks), however offsetFromStart is unsigned 64-bit.
+    To resolve we just need to do an initial seek from the start, and then a series of offset seeks to make up the remainder.
+    */
+    if (offsetFromStart > 0x7FFFFFFF) {
+        drflac_uint64 bytesRemaining = offsetFromStart;
+        if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) {
+            return DRFLAC_FALSE;
+        }
+        bytesRemaining -= 0x7FFFFFFF;
+
+        while (bytesRemaining > 0x7FFFFFFF) {
+            if (!bs->onSeek(bs->pUserData, 0x7FFFFFFF, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;
+            }
+            bytesRemaining -= 0x7FFFFFFF;
+        }
+
+        if (bytesRemaining > 0) {
+            if (!bs->onSeek(bs->pUserData, (int)bytesRemaining, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;
+            }
+        }
+    } else {
+        if (!bs->onSeek(bs->pUserData, (int)offsetFromStart, drflac_seek_origin_start)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    /* The cache should be reset to force a reload of fresh data from the client. */
+    drflac__reset_cache(bs);
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_result drflac__read_utf8_coded_number(drflac_bs* bs, drflac_uint64* pNumberOut, drflac_uint8* pCRCOut)
+{
+    drflac_uint8 crc;
+    drflac_uint64 result;
+    drflac_uint8 utf8[7] = {0};
+    int byteCount;
+    int i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(pNumberOut != NULL);
+    DRFLAC_ASSERT(pCRCOut != NULL);
+
+    crc = *pCRCOut;
+
+    if (!drflac__read_uint8(bs, 8, utf8)) {
+        *pNumberOut = 0;
+        return DRFLAC_AT_END;
+    }
+    crc = drflac_crc8(crc, utf8[0], 8);
+
+    if ((utf8[0] & 0x80) == 0) {
+        *pNumberOut = utf8[0];
+        *pCRCOut = crc;
+        return DRFLAC_SUCCESS;
+    }
+
+    /*byteCount = 1;*/
+    if ((utf8[0] & 0xE0) == 0xC0) {
+        byteCount = 2;
+    } else if ((utf8[0] & 0xF0) == 0xE0) {
+        byteCount = 3;
+    } else if ((utf8[0] & 0xF8) == 0xF0) {
+        byteCount = 4;
+    } else if ((utf8[0] & 0xFC) == 0xF8) {
+        byteCount = 5;
+    } else if ((utf8[0] & 0xFE) == 0xFC) {
+        byteCount = 6;
+    } else if ((utf8[0] & 0xFF) == 0xFE) {
+        byteCount = 7;
+    } else {
+        *pNumberOut = 0;
+        return DRFLAC_CRC_MISMATCH;     /* Bad UTF-8 encoding. */
+    }
+
+    /* Read extra bytes. */
+    DRFLAC_ASSERT(byteCount > 1);
+
+    result = (drflac_uint64)(utf8[0] & (0xFF >> (byteCount + 1)));
+    for (i = 1; i < byteCount; ++i) {
+        if (!drflac__read_uint8(bs, 8, utf8 + i)) {
+            *pNumberOut = 0;
+            return DRFLAC_AT_END;
+        }
+        crc = drflac_crc8(crc, utf8[i], 8);
+
+        result = (result << 6) | (utf8[i] & 0x3F);
+    }
+
+    *pNumberOut = result;
+    *pCRCOut = crc;
+    return DRFLAC_SUCCESS;
+}
+
+
+
+/*
+The next two functions are responsible for calculating the prediction.
+
+When the bits per sample is >16 we need to use 64-bit integer arithmetic because otherwise we'll run out of precision. It's
+safe to assume this will be slower on 32-bit platforms so we use a more optimal solution when the bits per sample is <=16.
+*/
+static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_32(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
+{
+    drflac_int32 prediction = 0;
+
+    DRFLAC_ASSERT(order <= 32);
+
+    /* 32-bit version. */
+
+    /* VC++ optimizes this to a single jmp. I've not yet verified this for other compilers. */
+    switch (order)
+    {
+    case 32: prediction += coefficients[31] * pDecodedSamples[-32];
+    case 31: prediction += coefficients[30] * pDecodedSamples[-31];
+    case 30: prediction += coefficients[29] * pDecodedSamples[-30];
+    case 29: prediction += coefficients[28] * pDecodedSamples[-29];
+    case 28: prediction += coefficients[27] * pDecodedSamples[-28];
+    case 27: prediction += coefficients[26] * pDecodedSamples[-27];
+    case 26: prediction += coefficients[25] * pDecodedSamples[-26];
+    case 25: prediction += coefficients[24] * pDecodedSamples[-25];
+    case 24: prediction += coefficients[23] * pDecodedSamples[-24];
+    case 23: prediction += coefficients[22] * pDecodedSamples[-23];
+    case 22: prediction += coefficients[21] * pDecodedSamples[-22];
+    case 21: prediction += coefficients[20] * pDecodedSamples[-21];
+    case 20: prediction += coefficients[19] * pDecodedSamples[-20];
+    case 19: prediction += coefficients[18] * pDecodedSamples[-19];
+    case 18: prediction += coefficients[17] * pDecodedSamples[-18];
+    case 17: prediction += coefficients[16] * pDecodedSamples[-17];
+    case 16: prediction += coefficients[15] * pDecodedSamples[-16];
+    case 15: prediction += coefficients[14] * pDecodedSamples[-15];
+    case 14: prediction += coefficients[13] * pDecodedSamples[-14];
+    case 13: prediction += coefficients[12] * pDecodedSamples[-13];
+    case 12: prediction += coefficients[11] * pDecodedSamples[-12];
+    case 11: prediction += coefficients[10] * pDecodedSamples[-11];
+    case 10: prediction += coefficients[ 9] * pDecodedSamples[-10];
+    case  9: prediction += coefficients[ 8] * pDecodedSamples[- 9];
+    case  8: prediction += coefficients[ 7] * pDecodedSamples[- 8];
+    case  7: prediction += coefficients[ 6] * pDecodedSamples[- 7];
+    case  6: prediction += coefficients[ 5] * pDecodedSamples[- 6];
+    case  5: prediction += coefficients[ 4] * pDecodedSamples[- 5];
+    case  4: prediction += coefficients[ 3] * pDecodedSamples[- 4];
+    case  3: prediction += coefficients[ 2] * pDecodedSamples[- 3];
+    case  2: prediction += coefficients[ 1] * pDecodedSamples[- 2];
+    case  1: prediction += coefficients[ 0] * pDecodedSamples[- 1];
+    }
+
+    return (drflac_int32)(prediction >> shift);
+}
+
+static DRFLAC_INLINE drflac_int32 drflac__calculate_prediction_64(drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
+{
+    drflac_int64 prediction;
+
+    DRFLAC_ASSERT(order <= 32);
+
+    /* 64-bit version. */
+
+    /* This method is faster on the 32-bit build when compiling with VC++. See note below. */
+#ifndef DRFLAC_64BIT
+    if (order == 8)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7] * (drflac_int64)pDecodedSamples[-8];
+    }
+    else if (order == 7)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6] * (drflac_int64)pDecodedSamples[-7];
+    }
+    else if (order == 3)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+    }
+    else if (order == 6)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5] * (drflac_int64)pDecodedSamples[-6];
+    }
+    else if (order == 5)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4] * (drflac_int64)pDecodedSamples[-5];
+    }
+    else if (order == 4)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2] * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3] * (drflac_int64)pDecodedSamples[-4];
+    }
+    else if (order == 12)
+    {
+        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
+        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
+        prediction += coefficients[9]  * (drflac_int64)pDecodedSamples[-10];
+        prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11];
+        prediction += coefficients[11] * (drflac_int64)pDecodedSamples[-12];
+    }
+    else if (order == 2)
+    {
+        prediction  = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1] * (drflac_int64)pDecodedSamples[-2];
+    }
+    else if (order == 1)
+    {
+        prediction = coefficients[0] * (drflac_int64)pDecodedSamples[-1];
+    }
+    else if (order == 10)
+    {
+        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
+        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
+        prediction += coefficients[9]  * (drflac_int64)pDecodedSamples[-10];
+    }
+    else if (order == 9)
+    {
+        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
+        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
+    }
+    else if (order == 11)
+    {
+        prediction  = coefficients[0]  * (drflac_int64)pDecodedSamples[-1];
+        prediction += coefficients[1]  * (drflac_int64)pDecodedSamples[-2];
+        prediction += coefficients[2]  * (drflac_int64)pDecodedSamples[-3];
+        prediction += coefficients[3]  * (drflac_int64)pDecodedSamples[-4];
+        prediction += coefficients[4]  * (drflac_int64)pDecodedSamples[-5];
+        prediction += coefficients[5]  * (drflac_int64)pDecodedSamples[-6];
+        prediction += coefficients[6]  * (drflac_int64)pDecodedSamples[-7];
+        prediction += coefficients[7]  * (drflac_int64)pDecodedSamples[-8];
+        prediction += coefficients[8]  * (drflac_int64)pDecodedSamples[-9];
+        prediction += coefficients[9]  * (drflac_int64)pDecodedSamples[-10];
+        prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11];
+    }
+    else
+    {
+        int j;
+
+        prediction = 0;
+        for (j = 0; j < (int)order; ++j) {
+            prediction += coefficients[j] * (drflac_int64)pDecodedSamples[-j-1];
+        }
+    }
+#endif
+
+    /*
+    VC++ optimizes this to a single jmp instruction, but only the 64-bit build. The 32-bit build generates less efficient code for some
+    reason. The ugly version above is faster so we'll just switch between the two depending on the target platform.
+    */
+#ifdef DRFLAC_64BIT
+    prediction = 0;
+    switch (order)
+    {
+    case 32: prediction += coefficients[31] * (drflac_int64)pDecodedSamples[-32];
+    case 31: prediction += coefficients[30] * (drflac_int64)pDecodedSamples[-31];
+    case 30: prediction += coefficients[29] * (drflac_int64)pDecodedSamples[-30];
+    case 29: prediction += coefficients[28] * (drflac_int64)pDecodedSamples[-29];
+    case 28: prediction += coefficients[27] * (drflac_int64)pDecodedSamples[-28];
+    case 27: prediction += coefficients[26] * (drflac_int64)pDecodedSamples[-27];
+    case 26: prediction += coefficients[25] * (drflac_int64)pDecodedSamples[-26];
+    case 25: prediction += coefficients[24] * (drflac_int64)pDecodedSamples[-25];
+    case 24: prediction += coefficients[23] * (drflac_int64)pDecodedSamples[-24];
+    case 23: prediction += coefficients[22] * (drflac_int64)pDecodedSamples[-23];
+    case 22: prediction += coefficients[21] * (drflac_int64)pDecodedSamples[-22];
+    case 21: prediction += coefficients[20] * (drflac_int64)pDecodedSamples[-21];
+    case 20: prediction += coefficients[19] * (drflac_int64)pDecodedSamples[-20];
+    case 19: prediction += coefficients[18] * (drflac_int64)pDecodedSamples[-19];
+    case 18: prediction += coefficients[17] * (drflac_int64)pDecodedSamples[-18];
+    case 17: prediction += coefficients[16] * (drflac_int64)pDecodedSamples[-17];
+    case 16: prediction += coefficients[15] * (drflac_int64)pDecodedSamples[-16];
+    case 15: prediction += coefficients[14] * (drflac_int64)pDecodedSamples[-15];
+    case 14: prediction += coefficients[13] * (drflac_int64)pDecodedSamples[-14];
+    case 13: prediction += coefficients[12] * (drflac_int64)pDecodedSamples[-13];
+    case 12: prediction += coefficients[11] * (drflac_int64)pDecodedSamples[-12];
+    case 11: prediction += coefficients[10] * (drflac_int64)pDecodedSamples[-11];
+    case 10: prediction += coefficients[ 9] * (drflac_int64)pDecodedSamples[-10];
+    case  9: prediction += coefficients[ 8] * (drflac_int64)pDecodedSamples[- 9];
+    case  8: prediction += coefficients[ 7] * (drflac_int64)pDecodedSamples[- 8];
+    case  7: prediction += coefficients[ 6] * (drflac_int64)pDecodedSamples[- 7];
+    case  6: prediction += coefficients[ 5] * (drflac_int64)pDecodedSamples[- 6];
+    case  5: prediction += coefficients[ 4] * (drflac_int64)pDecodedSamples[- 5];
+    case  4: prediction += coefficients[ 3] * (drflac_int64)pDecodedSamples[- 4];
+    case  3: prediction += coefficients[ 2] * (drflac_int64)pDecodedSamples[- 3];
+    case  2: prediction += coefficients[ 1] * (drflac_int64)pDecodedSamples[- 2];
+    case  1: prediction += coefficients[ 0] * (drflac_int64)pDecodedSamples[- 1];
+    }
+#endif
+
+    return (drflac_int32)(prediction >> shift);
+}
+
+
+#if 0
+/*
+Reference implementation for reading and decoding samples with residual. This is intentionally left unoptimized for the
+sake of readability and should only be used as a reference.
+*/
+static drflac_bool32 drflac__decode_samples_with_residual__rice__reference(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(count > 0);
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    for (i = 0; i < count; ++i) {
+        drflac_uint32 zeroCounter = 0;
+        for (;;) {
+            drflac_uint8 bit;
+            if (!drflac__read_uint8(bs, 1, &bit)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (bit == 0) {
+                zeroCounter += 1;
+            } else {
+                break;
+            }
+        }
+
+        drflac_uint32 decodedRice;
+        if (riceParam > 0) {
+            if (!drflac__read_uint32(bs, riceParam, &decodedRice)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            decodedRice = 0;
+        }
+
+        decodedRice |= (zeroCounter << riceParam);
+        if ((decodedRice & 0x01)) {
+            decodedRice = ~(decodedRice >> 1);
+        } else {
+            decodedRice =  (decodedRice >> 1);
+        }
+
+
+        if (bitsPerSample+shift >= 32) {
+            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i);
+        } else {
+            pSamplesOut[i] = decodedRice + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i);
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+#endif
+
+#if 0
+static drflac_bool32 drflac__read_rice_parts__reference(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
+{
+    drflac_uint32 zeroCounter = 0;
+    drflac_uint32 decodedRice;
+
+    for (;;) {
+        drflac_uint8 bit;
+        if (!drflac__read_uint8(bs, 1, &bit)) {
+            return DRFLAC_FALSE;
+        }
+
+        if (bit == 0) {
+            zeroCounter += 1;
+        } else {
+            break;
+        }
+    }
+
+    if (riceParam > 0) {
+        if (!drflac__read_uint32(bs, riceParam, &decodedRice)) {
+            return DRFLAC_FALSE;
+        }
+    } else {
+        decodedRice = 0;
+    }
+
+    *pZeroCounterOut = zeroCounter;
+    *pRiceParamPartOut = decodedRice;
+    return DRFLAC_TRUE;
+}
+#endif
+
+#if 0
+static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
+{
+    drflac_cache_t riceParamMask;
+    drflac_uint32 zeroCounter;
+    drflac_uint32 setBitOffsetPlus1;
+    drflac_uint32 riceParamPart;
+    drflac_uint32 riceLength;
+
+    DRFLAC_ASSERT(riceParam > 0);   /* <-- riceParam should never be 0. drflac__read_rice_parts__param_equals_zero() should be used instead for this case. */
+
+    riceParamMask = DRFLAC_CACHE_L1_SELECTION_MASK(riceParam);
+
+    zeroCounter = 0;
+    while (bs->cache == 0) {
+        zeroCounter += (drflac_uint32)DRFLAC_CACHE_L1_BITS_REMAINING(bs);
+        if (!drflac__reload_cache(bs)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    setBitOffsetPlus1 = drflac__clz(bs->cache);
+    zeroCounter += setBitOffsetPlus1;
+    setBitOffsetPlus1 += 1;
+
+    riceLength = setBitOffsetPlus1 + riceParam;
+    if (riceLength < DRFLAC_CACHE_L1_BITS_REMAINING(bs)) {
+        riceParamPart = (drflac_uint32)((bs->cache & (riceParamMask >> setBitOffsetPlus1)) >> DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceLength));
+
+        bs->consumedBits += riceLength;
+        bs->cache <<= riceLength;
+    } else {
+        drflac_uint32 bitCountLo;
+        drflac_cache_t resultHi;
+
+        bs->consumedBits += riceLength;
+        bs->cache <<= setBitOffsetPlus1 & (DRFLAC_CACHE_L1_SIZE_BITS(bs)-1);    /* <-- Equivalent to "if (setBitOffsetPlus1 < DRFLAC_CACHE_L1_SIZE_BITS(bs)) { bs->cache <<= setBitOffsetPlus1; }" */
+
+        /* It straddles the cached data. It will never cover more than the next chunk. We just read the number in two parts and combine them. */
+        bitCountLo = bs->consumedBits - DRFLAC_CACHE_L1_SIZE_BITS(bs);
+        resultHi = DRFLAC_CACHE_L1_SELECT_AND_SHIFT(bs, riceParam);  /* <-- Use DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE() if ever this function allows riceParam=0. */
+
+        if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+#ifndef DR_FLAC_NO_CRC
+            drflac__update_crc16(bs);
+#endif
+            bs->cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+            bs->consumedBits = 0;
+#ifndef DR_FLAC_NO_CRC
+            bs->crc16Cache = bs->cache;
+#endif
+        } else {
+            /* Slow path. We need to fetch more data from the client. */
+            if (!drflac__reload_cache(bs)) {
+                return DRFLAC_FALSE;
+            }
+        }
+
+        riceParamPart = (drflac_uint32)(resultHi | DRFLAC_CACHE_L1_SELECT_AND_SHIFT_SAFE(bs, bitCountLo));
+
+        bs->consumedBits += bitCountLo;
+        bs->cache <<= bitCountLo;
+    }
+
+    pZeroCounterOut[0] = zeroCounter;
+    pRiceParamPartOut[0] = riceParamPart;
+
+    return DRFLAC_TRUE;
+}
+#endif
+
+static DRFLAC_INLINE drflac_bool32 drflac__read_rice_parts_x1(drflac_bs* bs, drflac_uint8 riceParam, drflac_uint32* pZeroCounterOut, drflac_uint32* pRiceParamPartOut)
+{
+    drflac_uint32  riceParamPlus1 = riceParam + 1;
+    /*drflac_cache_t riceParamPlus1Mask  = DRFLAC_CACHE_L1_SELECTION_MASK(riceParamPlus1);*/
+    drflac_uint32  riceParamPlus1Shift = DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPlus1);
+    drflac_uint32  riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1;
+
+    /*
+    The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have
+    no idea how this will work in practice...
+    */
+    drflac_cache_t bs_cache = bs->cache;
+    drflac_uint32  bs_consumedBits = bs->consumedBits;
+
+    /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */
+    drflac_uint32  lzcount = drflac__clz(bs_cache);
+    if (lzcount < sizeof(bs_cache)*8) {
+        pZeroCounterOut[0] = lzcount;
+
+        /*
+        It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting
+        this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled
+        outside of this function at a higher level.
+        */
+    extract_rice_param_part:
+        bs_cache       <<= lzcount;
+        bs_consumedBits += lzcount;
+
+        if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) {
+            /* Getting here means the rice parameter part is wholly contained within the current cache line. */
+            pRiceParamPartOut[0] = (drflac_uint32)(bs_cache >> riceParamPlus1Shift);
+            bs_cache       <<= riceParamPlus1;
+            bs_consumedBits += riceParamPlus1;
+        } else {
+            drflac_uint32 riceParamPartHi;
+            drflac_uint32 riceParamPartLo;
+            drflac_uint32 riceParamPartLoBitCount;
+
+            /*
+            Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache
+            line, reload the cache, and then combine it with the head of the next cache line.
+            */
+
+            /* Grab the high part of the rice parameter part. */
+            riceParamPartHi = (drflac_uint32)(bs_cache >> riceParamPlus1Shift);
+
+            /* Before reloading the cache we need to grab the size in bits of the low part. */
+            riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits;
+            DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32);
+
+            /* Now reload the cache. */
+            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+            #ifndef DR_FLAC_NO_CRC
+                drflac__update_crc16(bs);
+            #endif
+                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+                bs_consumedBits = riceParamPartLoBitCount;
+            #ifndef DR_FLAC_NO_CRC
+                bs->crc16Cache = bs_cache;
+            #endif
+            } else {
+                /* Slow path. We need to fetch more data from the client. */
+                if (!drflac__reload_cache(bs)) {
+                    return DRFLAC_FALSE;
+                }
+
+                bs_cache = bs->cache;
+                bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
+            }
+
+            /* We should now have enough information to construct the rice parameter part. */
+            riceParamPartLo = (drflac_uint32)(bs_cache >> (DRFLAC_CACHE_L1_SELECTION_SHIFT(bs, riceParamPartLoBitCount)));
+            pRiceParamPartOut[0] = riceParamPartHi | riceParamPartLo;
+
+            bs_cache <<= riceParamPartLoBitCount;
+        }
+    } else {
+        /*
+        Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call
+        to drflac__clz() and we need to reload the cache.
+        */
+        drflac_uint32 zeroCounter = (drflac_uint32)(DRFLAC_CACHE_L1_SIZE_BITS(bs) - bs_consumedBits);
+        for (;;) {
+            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+            #ifndef DR_FLAC_NO_CRC
+                drflac__update_crc16(bs);
+            #endif
+                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+                bs_consumedBits = 0;
+            #ifndef DR_FLAC_NO_CRC
+                bs->crc16Cache = bs_cache;
+            #endif
+            } else {
+                /* Slow path. We need to fetch more data from the client. */
+                if (!drflac__reload_cache(bs)) {
+                    return DRFLAC_FALSE;
+                }
+
+                bs_cache = bs->cache;
+                bs_consumedBits = bs->consumedBits;
+            }
+
+            lzcount = drflac__clz(bs_cache);
+            zeroCounter += lzcount;
+
+            if (lzcount < sizeof(bs_cache)*8) {
+                break;
+            }
+        }
+
+        pZeroCounterOut[0] = zeroCounter;
+        goto extract_rice_param_part;
+    }
+
+    /* Make sure the cache is restored at the end of it all. */
+    bs->cache = bs_cache;
+    bs->consumedBits = bs_consumedBits;
+
+    return DRFLAC_TRUE;
+}
+
+static DRFLAC_INLINE drflac_bool32 drflac__seek_rice_parts(drflac_bs* bs, drflac_uint8 riceParam)
+{
+    drflac_uint32  riceParamPlus1 = riceParam + 1;
+    drflac_uint32  riceParamPlus1MaxConsumedBits = DRFLAC_CACHE_L1_SIZE_BITS(bs) - riceParamPlus1;
+
+    /*
+    The idea here is to use local variables for the cache in an attempt to encourage the compiler to store them in registers. I have
+    no idea how this will work in practice...
+    */
+    drflac_cache_t bs_cache = bs->cache;
+    drflac_uint32  bs_consumedBits = bs->consumedBits;
+
+    /* The first thing to do is find the first unset bit. Most likely a bit will be set in the current cache line. */
+    drflac_uint32  lzcount = drflac__clz(bs_cache);
+    if (lzcount < sizeof(bs_cache)*8) {
+        /*
+        It is most likely that the riceParam part (which comes after the zero counter) is also on this cache line. When extracting
+        this, we include the set bit from the unary coded part because it simplifies cache management. This bit will be handled
+        outside of this function at a higher level.
+        */
+    extract_rice_param_part:
+        bs_cache       <<= lzcount;
+        bs_consumedBits += lzcount;
+
+        if (bs_consumedBits <= riceParamPlus1MaxConsumedBits) {
+            /* Getting here means the rice parameter part is wholly contained within the current cache line. */
+            bs_cache       <<= riceParamPlus1;
+            bs_consumedBits += riceParamPlus1;
+        } else {
+            /*
+            Getting here means the rice parameter part straddles the cache line. We need to read from the tail of the current cache
+            line, reload the cache, and then combine it with the head of the next cache line.
+            */
+
+            /* Before reloading the cache we need to grab the size in bits of the low part. */
+            drflac_uint32 riceParamPartLoBitCount = bs_consumedBits - riceParamPlus1MaxConsumedBits;
+            DRFLAC_ASSERT(riceParamPartLoBitCount > 0 && riceParamPartLoBitCount < 32);
+
+            /* Now reload the cache. */
+            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+            #ifndef DR_FLAC_NO_CRC
+                drflac__update_crc16(bs);
+            #endif
+                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+                bs_consumedBits = riceParamPartLoBitCount;
+            #ifndef DR_FLAC_NO_CRC
+                bs->crc16Cache = bs_cache;
+            #endif
+            } else {
+                /* Slow path. We need to fetch more data from the client. */
+                if (!drflac__reload_cache(bs)) {
+                    return DRFLAC_FALSE;
+                }
+
+                bs_cache = bs->cache;
+                bs_consumedBits = bs->consumedBits + riceParamPartLoBitCount;
+            }
+
+            bs_cache <<= riceParamPartLoBitCount;
+        }
+    } else {
+        /*
+        Getting here means there are no bits set on the cache line. This is a less optimal case because we just wasted a call
+        to drflac__clz() and we need to reload the cache.
+        */
+        for (;;) {
+            if (bs->nextL2Line < DRFLAC_CACHE_L2_LINE_COUNT(bs)) {
+            #ifndef DR_FLAC_NO_CRC
+                drflac__update_crc16(bs);
+            #endif
+                bs_cache = drflac__be2host__cache_line(bs->cacheL2[bs->nextL2Line++]);
+                bs_consumedBits = 0;
+            #ifndef DR_FLAC_NO_CRC
+                bs->crc16Cache = bs_cache;
+            #endif
+            } else {
+                /* Slow path. We need to fetch more data from the client. */
+                if (!drflac__reload_cache(bs)) {
+                    return DRFLAC_FALSE;
+                }
+
+                bs_cache = bs->cache;
+                bs_consumedBits = bs->consumedBits;
+            }
+
+            lzcount = drflac__clz(bs_cache);
+            if (lzcount < sizeof(bs_cache)*8) {
+                break;
+            }
+        }
+
+        goto extract_rice_param_part;
+    }
+
+    /* Make sure the cache is restored at the end of it all. */
+    bs->cache = bs_cache;
+    bs->consumedBits = bs_consumedBits;
+
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar_zeroorder(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+    drflac_uint32 zeroCountPart0;
+    drflac_uint32 riceParamPart0;
+    drflac_uint32 riceParamMask;
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(count > 0);
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    (void)bitsPerSample;
+    (void)order;
+    (void)shift;
+    (void)coefficients;
+
+    riceParamMask  = (drflac_uint32)~((~0UL) << riceParam);
+
+    i = 0;
+    while (i < count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamPart0 &= riceParamMask;
+        riceParamPart0 |= (zeroCountPart0 << riceParam);
+        riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
+
+        pSamplesOut[i] = riceParamPart0;
+
+        i += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__scalar(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+    drflac_uint32 zeroCountPart0 = 0;
+    drflac_uint32 zeroCountPart1 = 0;
+    drflac_uint32 zeroCountPart2 = 0;
+    drflac_uint32 zeroCountPart3 = 0;
+    drflac_uint32 riceParamPart0 = 0;
+    drflac_uint32 riceParamPart1 = 0;
+    drflac_uint32 riceParamPart2 = 0;
+    drflac_uint32 riceParamPart3 = 0;
+    drflac_uint32 riceParamMask;
+    const drflac_int32* pSamplesOutEnd;
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(count > 0);
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    if (order == 0) {
+        return drflac__decode_samples_with_residual__rice__scalar_zeroorder(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    }
+
+    riceParamMask  = (drflac_uint32)~((~0UL) << riceParam);
+    pSamplesOutEnd = pSamplesOut + (count & ~3);
+
+    if (bitsPerSample+shift > 32) {
+        while (pSamplesOut < pSamplesOutEnd) {
+            /*
+            Rice extraction. It's faster to do this one at a time against local variables than it is to use the x4 version
+            against an array. Not sure why, but perhaps it's making more efficient use of registers?
+            */
+            if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) {
+                return DRFLAC_FALSE;
+            }
+
+            riceParamPart0 &= riceParamMask;
+            riceParamPart1 &= riceParamMask;
+            riceParamPart2 &= riceParamMask;
+            riceParamPart3 &= riceParamMask;
+
+            riceParamPart0 |= (zeroCountPart0 << riceParam);
+            riceParamPart1 |= (zeroCountPart1 << riceParam);
+            riceParamPart2 |= (zeroCountPart2 << riceParam);
+            riceParamPart3 |= (zeroCountPart3 << riceParam);
+
+            riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
+            riceParamPart1  = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01];
+            riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
+            riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
+
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
+            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 1);
+            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 2);
+            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 3);
+
+            pSamplesOut += 4;
+        }
+    } else {
+        while (pSamplesOut < pSamplesOutEnd) {
+            if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart1, &riceParamPart1) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart2, &riceParamPart2) ||
+                !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart3, &riceParamPart3)) {
+                return DRFLAC_FALSE;
+            }
+
+            riceParamPart0 &= riceParamMask;
+            riceParamPart1 &= riceParamMask;
+            riceParamPart2 &= riceParamMask;
+            riceParamPart3 &= riceParamMask;
+
+            riceParamPart0 |= (zeroCountPart0 << riceParam);
+            riceParamPart1 |= (zeroCountPart1 << riceParam);
+            riceParamPart2 |= (zeroCountPart2 << riceParam);
+            riceParamPart3 |= (zeroCountPart3 << riceParam);
+
+            riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
+            riceParamPart1  = (riceParamPart1 >> 1) ^ t[riceParamPart1 & 0x01];
+            riceParamPart2  = (riceParamPart2 >> 1) ^ t[riceParamPart2 & 0x01];
+            riceParamPart3  = (riceParamPart3 >> 1) ^ t[riceParamPart3 & 0x01];
+
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
+            pSamplesOut[1] = riceParamPart1 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 1);
+            pSamplesOut[2] = riceParamPart2 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 2);
+            pSamplesOut[3] = riceParamPart3 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 3);
+
+            pSamplesOut += 4;
+        }
+    }
+
+    i = (count & ~3);
+    while (i < count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountPart0, &riceParamPart0)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamPart0 &= riceParamMask;
+        riceParamPart0 |= (zeroCountPart0 << riceParam);
+        riceParamPart0  = (riceParamPart0 >> 1) ^ t[riceParamPart0 & 0x01];
+        /*riceParamPart0  = (riceParamPart0 >> 1) ^ (~(riceParamPart0 & 0x01) + 1);*/
+
+        /* Sample reconstruction. */
+        if (bitsPerSample+shift > 32) {
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + 0);
+        } else {
+            pSamplesOut[0] = riceParamPart0 + drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + 0);
+        }
+
+        i += 1;
+        pSamplesOut += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE __m128i drflac__mm_packs_interleaved_epi32(__m128i a, __m128i b)
+{
+    __m128i r;
+
+    /* Pack. */
+    r = _mm_packs_epi32(a, b);
+
+    /* a3a2 a1a0 b3b2 b1b0 -> a3a2 b3b2 a1a0 b1b0 */
+    r = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 1, 2, 0));
+
+    /* a3a2 b3b2 a1a0 b1b0 -> a3b3 a2b2 a1b1 a0b0 */
+    r = _mm_shufflehi_epi16(r, _MM_SHUFFLE(3, 1, 2, 0));
+    r = _mm_shufflelo_epi16(r, _MM_SHUFFLE(3, 1, 2, 0));
+
+    return r;
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_SSE41)
+static DRFLAC_INLINE __m128i drflac__mm_not_si128(__m128i a)
+{
+    return _mm_xor_si128(a, _mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128()));
+}
+
+static DRFLAC_INLINE __m128i drflac__mm_hadd_epi32(__m128i x)
+{
+    __m128i x64 = _mm_add_epi32(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2)));
+    __m128i x32 = _mm_shufflelo_epi16(x64, _MM_SHUFFLE(1, 0, 3, 2));
+    return _mm_add_epi32(x64, x32);
+}
+
+static DRFLAC_INLINE __m128i drflac__mm_hadd_epi64(__m128i x)
+{
+    return _mm_add_epi64(x, _mm_shuffle_epi32(x, _MM_SHUFFLE(1, 0, 3, 2)));
+}
+
+static DRFLAC_INLINE __m128i drflac__mm_srai_epi64(__m128i x, int count)
+{
+    /*
+    To simplify this we are assuming count < 32. This restriction allows us to work on a low side and a high side. The low side
+    is shifted with zero bits, whereas the right side is shifted with sign bits.
+    */
+    __m128i lo = _mm_srli_epi64(x, count);
+    __m128i hi = _mm_srai_epi32(x, count);
+
+    hi = _mm_and_si128(hi, _mm_set_epi32(0xFFFFFFFF, 0, 0xFFFFFFFF, 0));    /* The high part needs to have the low part cleared. */
+
+    return _mm_or_si128(lo, hi);
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_32(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    int i;
+    drflac_uint32 riceParamMask;
+    drflac_int32* pDecodedSamples    = pSamplesOut;
+    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
+    drflac_uint32 zeroCountParts0 = 0;
+    drflac_uint32 zeroCountParts1 = 0;
+    drflac_uint32 zeroCountParts2 = 0;
+    drflac_uint32 zeroCountParts3 = 0;
+    drflac_uint32 riceParamParts0 = 0;
+    drflac_uint32 riceParamParts1 = 0;
+    drflac_uint32 riceParamParts2 = 0;
+    drflac_uint32 riceParamParts3 = 0;
+    __m128i coefficients128_0;
+    __m128i coefficients128_4;
+    __m128i coefficients128_8;
+    __m128i samples128_0;
+    __m128i samples128_4;
+    __m128i samples128_8;
+    __m128i riceParamMask128;
+
+    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+
+    riceParamMask    = (drflac_uint32)~((~0UL) << riceParam);
+    riceParamMask128 = _mm_set1_epi32(riceParamMask);
+
+    /* Pre-load. */
+    coefficients128_0 = _mm_setzero_si128();
+    coefficients128_4 = _mm_setzero_si128();
+    coefficients128_8 = _mm_setzero_si128();
+
+    samples128_0 = _mm_setzero_si128();
+    samples128_4 = _mm_setzero_si128();
+    samples128_8 = _mm_setzero_si128();
+
+    /*
+    Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
+    what's available in the input buffers. It would be convenient to use a fall-through switch to do this, but this results
+    in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
+    so I think there's opportunity for this to be simplified.
+    */
+#if 1
+    {
+        int runningOrder = order;
+
+        /* 0 - 3. */
+        if (runningOrder >= 4) {
+            coefficients128_0 = _mm_loadu_si128((const __m128i*)(coefficients + 0));
+            samples128_0      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 4));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0); break;
+                case 2: coefficients128_0 = _mm_set_epi32(0, 0,               coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0,               0); break;
+                case 1: coefficients128_0 = _mm_set_epi32(0, 0,               0,               coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0,               0,               0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* 4 - 7 */
+        if (runningOrder >= 4) {
+            coefficients128_4 = _mm_loadu_si128((const __m128i*)(coefficients + 4));
+            samples128_4      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 8));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0); break;
+                case 2: coefficients128_4 = _mm_set_epi32(0, 0,               coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0,               0); break;
+                case 1: coefficients128_4 = _mm_set_epi32(0, 0,               0,               coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0,               0,               0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* 8 - 11 */
+        if (runningOrder == 4) {
+            coefficients128_8 = _mm_loadu_si128((const __m128i*)(coefficients + 8));
+            samples128_8      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 12));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0); break;
+                case 2: coefficients128_8 = _mm_set_epi32(0, 0,                coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0,                0); break;
+                case 1: coefficients128_8 = _mm_set_epi32(0, 0,                0,               coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0,                0,                0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
+        coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3));
+        coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3));
+        coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3));
+    }
+#else
+    /* This causes strict-aliasing warnings with GCC. */
+    switch (order)
+    {
+    case 12: ((drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12];
+    case 11: ((drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11];
+    case 10: ((drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10];
+    case 9:  ((drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9];
+    case 8:  ((drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8];
+    case 7:  ((drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7];
+    case 6:  ((drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6];
+    case 5:  ((drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5];
+    case 4:  ((drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4];
+    case 3:  ((drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3];
+    case 2:  ((drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2];
+    case 1:  ((drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1];
+    }
+#endif
+
+    /* For this version we are doing one sample at a time. */
+    while (pDecodedSamples < pDecodedSamplesEnd) {
+        __m128i prediction128;
+        __m128i zeroCountPart128;
+        __m128i riceParamPart128;
+
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) {
+            return DRFLAC_FALSE;
+        }
+
+        zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0);
+        riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0);
+
+        riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128);
+        riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam));
+        riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01))), _mm_set1_epi32(0x01)));  /* <-- SSE2 compatible */
+        /*riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_mullo_epi32(_mm_and_si128(riceParamPart128, _mm_set1_epi32(0x01)), _mm_set1_epi32(0xFFFFFFFF)));*/   /* <-- Only supported from SSE4.1 and is slower in my testing... */
+
+        if (order <= 4) {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 = _mm_mullo_epi32(coefficients128_0, samples128_0);
+
+                /* Horizontal add and shift. */
+                prediction128 = drflac__mm_hadd_epi32(prediction128);
+                prediction128 = _mm_srai_epi32(prediction128, shift);
+                prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
+
+                samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
+                riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
+            }
+        } else if (order <= 8) {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 =                              _mm_mullo_epi32(coefficients128_4, samples128_4);
+                prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0));
+
+                /* Horizontal add and shift. */
+                prediction128 = drflac__mm_hadd_epi32(prediction128);
+                prediction128 = _mm_srai_epi32(prediction128, shift);
+                prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
+
+                samples128_4 = _mm_alignr_epi8(samples128_0,  samples128_4, 4);
+                samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
+                riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
+            }
+        } else {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 =                              _mm_mullo_epi32(coefficients128_8, samples128_8);
+                prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_4, samples128_4));
+                prediction128 = _mm_add_epi32(prediction128, _mm_mullo_epi32(coefficients128_0, samples128_0));
+
+                /* Horizontal add and shift. */
+                prediction128 = drflac__mm_hadd_epi32(prediction128);
+                prediction128 = _mm_srai_epi32(prediction128, shift);
+                prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
+
+                samples128_8 = _mm_alignr_epi8(samples128_4,  samples128_8, 4);
+                samples128_4 = _mm_alignr_epi8(samples128_0,  samples128_4, 4);
+                samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
+                riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
+            }
+        }
+
+        /* We store samples in groups of 4. */
+        _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0);
+        pDecodedSamples += 4;
+    }
+
+    /* Make sure we process the last few samples. */
+    i = (count & ~3);
+    while (i < (int)count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamParts0 &= riceParamMask;
+        riceParamParts0 |= (zeroCountParts0 << riceParam);
+        riceParamParts0  = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01];
+
+        /* Sample reconstruction. */
+        pDecodedSamples[0] = riceParamParts0 + drflac__calculate_prediction_32(order, shift, coefficients, pDecodedSamples);
+
+        i += 1;
+        pDecodedSamples += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41_64(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    int i;
+    drflac_uint32 riceParamMask;
+    drflac_int32* pDecodedSamples    = pSamplesOut;
+    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
+    drflac_uint32 zeroCountParts0 = 0;
+    drflac_uint32 zeroCountParts1 = 0;
+    drflac_uint32 zeroCountParts2 = 0;
+    drflac_uint32 zeroCountParts3 = 0;
+    drflac_uint32 riceParamParts0 = 0;
+    drflac_uint32 riceParamParts1 = 0;
+    drflac_uint32 riceParamParts2 = 0;
+    drflac_uint32 riceParamParts3 = 0;
+    __m128i coefficients128_0;
+    __m128i coefficients128_4;
+    __m128i coefficients128_8;
+    __m128i samples128_0;
+    __m128i samples128_4;
+    __m128i samples128_8;
+    __m128i prediction128;
+    __m128i riceParamMask128;
+
+    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+
+    DRFLAC_ASSERT(order <= 12);
+
+    riceParamMask    = (drflac_uint32)~((~0UL) << riceParam);
+    riceParamMask128 = _mm_set1_epi32(riceParamMask);
+
+    prediction128 = _mm_setzero_si128();
+
+    /* Pre-load. */
+    coefficients128_0  = _mm_setzero_si128();
+    coefficients128_4  = _mm_setzero_si128();
+    coefficients128_8  = _mm_setzero_si128();
+
+    samples128_0  = _mm_setzero_si128();
+    samples128_4  = _mm_setzero_si128();
+    samples128_8  = _mm_setzero_si128();
+
+#if 1
+    {
+        int runningOrder = order;
+
+        /* 0 - 3. */
+        if (runningOrder >= 4) {
+            coefficients128_0 = _mm_loadu_si128((const __m128i*)(coefficients + 0));
+            samples128_0      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 4));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_0 = _mm_set_epi32(0, coefficients[2], coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], pSamplesOut[-3], 0); break;
+                case 2: coefficients128_0 = _mm_set_epi32(0, 0,               coefficients[1], coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], pSamplesOut[-2], 0,               0); break;
+                case 1: coefficients128_0 = _mm_set_epi32(0, 0,               0,               coefficients[0]); samples128_0 = _mm_set_epi32(pSamplesOut[-1], 0,               0,               0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* 4 - 7 */
+        if (runningOrder >= 4) {
+            coefficients128_4 = _mm_loadu_si128((const __m128i*)(coefficients + 4));
+            samples128_4      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 8));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_4 = _mm_set_epi32(0, coefficients[6], coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], pSamplesOut[-7], 0); break;
+                case 2: coefficients128_4 = _mm_set_epi32(0, 0,               coefficients[5], coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], pSamplesOut[-6], 0,               0); break;
+                case 1: coefficients128_4 = _mm_set_epi32(0, 0,               0,               coefficients[4]); samples128_4 = _mm_set_epi32(pSamplesOut[-5], 0,               0,               0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* 8 - 11 */
+        if (runningOrder == 4) {
+            coefficients128_8 = _mm_loadu_si128((const __m128i*)(coefficients + 8));
+            samples128_8      = _mm_loadu_si128((const __m128i*)(pSamplesOut  - 12));
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: coefficients128_8 = _mm_set_epi32(0, coefficients[10], coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], pSamplesOut[-11], 0); break;
+                case 2: coefficients128_8 = _mm_set_epi32(0, 0,                coefficients[9], coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], pSamplesOut[-10], 0,                0); break;
+                case 1: coefficients128_8 = _mm_set_epi32(0, 0,                0,               coefficients[8]); samples128_8 = _mm_set_epi32(pSamplesOut[-9], 0,                0,                0); break;
+            }
+            runningOrder = 0;
+        }
+
+        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
+        coefficients128_0 = _mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(0, 1, 2, 3));
+        coefficients128_4 = _mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(0, 1, 2, 3));
+        coefficients128_8 = _mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(0, 1, 2, 3));
+    }
+#else
+    switch (order)
+    {
+    case 12: ((drflac_int32*)&coefficients128_8)[0] = coefficients[11]; ((drflac_int32*)&samples128_8)[0] = pDecodedSamples[-12];
+    case 11: ((drflac_int32*)&coefficients128_8)[1] = coefficients[10]; ((drflac_int32*)&samples128_8)[1] = pDecodedSamples[-11];
+    case 10: ((drflac_int32*)&coefficients128_8)[2] = coefficients[ 9]; ((drflac_int32*)&samples128_8)[2] = pDecodedSamples[-10];
+    case 9:  ((drflac_int32*)&coefficients128_8)[3] = coefficients[ 8]; ((drflac_int32*)&samples128_8)[3] = pDecodedSamples[- 9];
+    case 8:  ((drflac_int32*)&coefficients128_4)[0] = coefficients[ 7]; ((drflac_int32*)&samples128_4)[0] = pDecodedSamples[- 8];
+    case 7:  ((drflac_int32*)&coefficients128_4)[1] = coefficients[ 6]; ((drflac_int32*)&samples128_4)[1] = pDecodedSamples[- 7];
+    case 6:  ((drflac_int32*)&coefficients128_4)[2] = coefficients[ 5]; ((drflac_int32*)&samples128_4)[2] = pDecodedSamples[- 6];
+    case 5:  ((drflac_int32*)&coefficients128_4)[3] = coefficients[ 4]; ((drflac_int32*)&samples128_4)[3] = pDecodedSamples[- 5];
+    case 4:  ((drflac_int32*)&coefficients128_0)[0] = coefficients[ 3]; ((drflac_int32*)&samples128_0)[0] = pDecodedSamples[- 4];
+    case 3:  ((drflac_int32*)&coefficients128_0)[1] = coefficients[ 2]; ((drflac_int32*)&samples128_0)[1] = pDecodedSamples[- 3];
+    case 2:  ((drflac_int32*)&coefficients128_0)[2] = coefficients[ 1]; ((drflac_int32*)&samples128_0)[2] = pDecodedSamples[- 2];
+    case 1:  ((drflac_int32*)&coefficients128_0)[3] = coefficients[ 0]; ((drflac_int32*)&samples128_0)[3] = pDecodedSamples[- 1];
+    }
+#endif
+
+    /* For this version we are doing one sample at a time. */
+    while (pDecodedSamples < pDecodedSamplesEnd) {
+        __m128i zeroCountPart128;
+        __m128i riceParamPart128;
+
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts1, &riceParamParts1) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts2, &riceParamParts2) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts3, &riceParamParts3)) {
+            return DRFLAC_FALSE;
+        }
+
+        zeroCountPart128 = _mm_set_epi32(zeroCountParts3, zeroCountParts2, zeroCountParts1, zeroCountParts0);
+        riceParamPart128 = _mm_set_epi32(riceParamParts3, riceParamParts2, riceParamParts1, riceParamParts0);
+
+        riceParamPart128 = _mm_and_si128(riceParamPart128, riceParamMask128);
+        riceParamPart128 = _mm_or_si128(riceParamPart128, _mm_slli_epi32(zeroCountPart128, riceParam));
+        riceParamPart128 = _mm_xor_si128(_mm_srli_epi32(riceParamPart128, 1), _mm_add_epi32(drflac__mm_not_si128(_mm_and_si128(riceParamPart128, _mm_set1_epi32(1))), _mm_set1_epi32(1)));
+
+        for (i = 0; i < 4; i += 1) {
+            prediction128 = _mm_xor_si128(prediction128, prediction128);    /* Reset to 0. */
+
+            switch (order)
+            {
+            case 12:
+            case 11: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(1, 1, 0, 0))));
+            case 10:
+            case  9: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_8, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_8, _MM_SHUFFLE(3, 3, 2, 2))));
+            case  8:
+            case  7: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(1, 1, 0, 0))));
+            case  6:
+            case  5: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_4, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_4, _MM_SHUFFLE(3, 3, 2, 2))));
+            case  4:
+            case  3: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(1, 1, 0, 0)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(1, 1, 0, 0))));
+            case  2:
+            case  1: prediction128 = _mm_add_epi64(prediction128, _mm_mul_epi32(_mm_shuffle_epi32(coefficients128_0, _MM_SHUFFLE(3, 3, 2, 2)), _mm_shuffle_epi32(samples128_0, _MM_SHUFFLE(3, 3, 2, 2))));
+            }
+
+            /* Horizontal add and shift. */
+            prediction128 = drflac__mm_hadd_epi64(prediction128);
+            prediction128 = drflac__mm_srai_epi64(prediction128, shift);
+            prediction128 = _mm_add_epi32(riceParamPart128, prediction128);
+
+            /* Our value should be sitting in prediction128[0]. We need to combine this with our SSE samples. */
+            samples128_8 = _mm_alignr_epi8(samples128_4,  samples128_8, 4);
+            samples128_4 = _mm_alignr_epi8(samples128_0,  samples128_4, 4);
+            samples128_0 = _mm_alignr_epi8(prediction128, samples128_0, 4);
+
+            /* Slide our rice parameter down so that the value in position 0 contains the next one to process. */
+            riceParamPart128 = _mm_alignr_epi8(_mm_setzero_si128(), riceParamPart128, 4);
+        }
+
+        /* We store samples in groups of 4. */
+        _mm_storeu_si128((__m128i*)pDecodedSamples, samples128_0);
+        pDecodedSamples += 4;
+    }
+
+    /* Make sure we process the last few samples. */
+    i = (count & ~3);
+    while (i < (int)count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts0, &riceParamParts0)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamParts0 &= riceParamMask;
+        riceParamParts0 |= (zeroCountParts0 << riceParam);
+        riceParamParts0  = (riceParamParts0 >> 1) ^ t[riceParamParts0 & 0x01];
+
+        /* Sample reconstruction. */
+        pDecodedSamples[0] = riceParamParts0 + drflac__calculate_prediction_64(order, shift, coefficients, pDecodedSamples);
+
+        i += 1;
+        pDecodedSamples += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__sse41(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(count > 0);
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    /* In my testing the order is rarely > 12, so in this case I'm going to simplify the SSE implementation by only handling order <= 12. */
+    if (order > 0 && order <= 12) {
+        if (bitsPerSample+shift > 32) {
+            return drflac__decode_samples_with_residual__rice__sse41_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
+        } else {
+            return drflac__decode_samples_with_residual__rice__sse41_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
+        }
+    } else {
+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac__vst2q_s32(drflac_int32* p, int32x4x2_t x)
+{
+    vst1q_s32(p+0, x.val[0]);
+    vst1q_s32(p+4, x.val[1]);
+}
+
+static DRFLAC_INLINE void drflac__vst2q_u32(drflac_uint32* p, uint32x4x2_t x)
+{
+    vst1q_u32(p+0, x.val[0]);
+    vst1q_u32(p+4, x.val[1]);
+}
+
+static DRFLAC_INLINE void drflac__vst2q_f32(float* p, float32x4x2_t x)
+{
+    vst1q_f32(p+0, x.val[0]);
+    vst1q_f32(p+4, x.val[1]);
+}
+
+static DRFLAC_INLINE void drflac__vst2q_s16(drflac_int16* p, int16x4x2_t x)
+{
+    vst1q_s16(p, vcombine_s16(x.val[0], x.val[1]));
+}
+
+static DRFLAC_INLINE void drflac__vst2q_u16(drflac_uint16* p, uint16x4x2_t x)
+{
+    vst1q_u16(p, vcombine_u16(x.val[0], x.val[1]));
+}
+
+static DRFLAC_INLINE int32x4_t drflac__vdupq_n_s32x4(drflac_int32 x3, drflac_int32 x2, drflac_int32 x1, drflac_int32 x0)
+{
+    drflac_int32 x[4];
+    x[3] = x3;
+    x[2] = x2;
+    x[1] = x1;
+    x[0] = x0;
+    return vld1q_s32(x);
+}
+
+static DRFLAC_INLINE int32x4_t drflac__valignrq_s32_1(int32x4_t a, int32x4_t b)
+{
+    /* Equivalent to SSE's _mm_alignr_epi8(a, b, 4) */
+
+    /* Reference */
+    /*return drflac__vdupq_n_s32x4(
+        vgetq_lane_s32(a, 0),
+        vgetq_lane_s32(b, 3),
+        vgetq_lane_s32(b, 2),
+        vgetq_lane_s32(b, 1)
+    );*/
+
+    return vextq_s32(b, a, 1);
+}
+
+static DRFLAC_INLINE uint32x4_t drflac__valignrq_u32_1(uint32x4_t a, uint32x4_t b)
+{
+    /* Equivalent to SSE's _mm_alignr_epi8(a, b, 4) */
+
+    /* Reference */
+    /*return drflac__vdupq_n_s32x4(
+        vgetq_lane_s32(a, 0),
+        vgetq_lane_s32(b, 3),
+        vgetq_lane_s32(b, 2),
+        vgetq_lane_s32(b, 1)
+    );*/
+
+    return vextq_u32(b, a, 1);
+}
+
+static DRFLAC_INLINE int32x2_t drflac__vhaddq_s32(int32x4_t x)
+{
+    /* The sum must end up in position 0. */
+
+    /* Reference */
+    /*return vdupq_n_s32(
+        vgetq_lane_s32(x, 3) +
+        vgetq_lane_s32(x, 2) +
+        vgetq_lane_s32(x, 1) +
+        vgetq_lane_s32(x, 0)
+    );*/
+
+    int32x2_t r = vadd_s32(vget_high_s32(x), vget_low_s32(x));
+    return vpadd_s32(r, r);
+}
+
+static DRFLAC_INLINE int64x1_t drflac__vhaddq_s64(int64x2_t x)
+{
+    return vadd_s64(vget_high_s64(x), vget_low_s64(x));
+}
+
+static DRFLAC_INLINE int32x4_t drflac__vrevq_s32(int32x4_t x)
+{
+    /* Reference */
+    /*return drflac__vdupq_n_s32x4(
+        vgetq_lane_s32(x, 0),
+        vgetq_lane_s32(x, 1),
+        vgetq_lane_s32(x, 2),
+        vgetq_lane_s32(x, 3)
+    );*/
+
+    return vrev64q_s32(vcombine_s32(vget_high_s32(x), vget_low_s32(x)));
+}
+
+static DRFLAC_INLINE int32x4_t drflac__vnotq_s32(int32x4_t x)
+{
+    return veorq_s32(x, vdupq_n_s32(0xFFFFFFFF));
+}
+
+static DRFLAC_INLINE uint32x4_t drflac__vnotq_u32(uint32x4_t x)
+{
+    return veorq_u32(x, vdupq_n_u32(0xFFFFFFFF));
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_32(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    int i;
+    drflac_uint32 riceParamMask;
+    drflac_int32* pDecodedSamples    = pSamplesOut;
+    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
+    drflac_uint32 zeroCountParts[4];
+    drflac_uint32 riceParamParts[4];
+    int32x4_t coefficients128_0;
+    int32x4_t coefficients128_4;
+    int32x4_t coefficients128_8;
+    int32x4_t samples128_0;
+    int32x4_t samples128_4;
+    int32x4_t samples128_8;
+    uint32x4_t riceParamMask128;
+    int32x4_t riceParam128;
+    int32x2_t shift64;
+    uint32x4_t one128;
+
+    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+
+    riceParamMask    = ~((~0UL) << riceParam);
+    riceParamMask128 = vdupq_n_u32(riceParamMask);
+
+    riceParam128 = vdupq_n_s32(riceParam);
+    shift64 = vdup_n_s32(-shift); /* Negate the shift because we'll be doing a variable shift using vshlq_s32(). */
+    one128 = vdupq_n_u32(1);
+
+    /*
+    Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
+    what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results
+    in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
+    so I think there's opportunity for this to be simplified.
+    */
+    {
+        int runningOrder = order;
+        drflac_int32 tempC[4] = {0, 0, 0, 0};
+        drflac_int32 tempS[4] = {0, 0, 0, 0};
+
+        /* 0 - 3. */
+        if (runningOrder >= 4) {
+            coefficients128_0 = vld1q_s32(coefficients + 0);
+            samples128_0      = vld1q_s32(pSamplesOut  - 4);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3]; /* fallthrough */
+                case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2]; /* fallthrough */
+                case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1]; /* fallthrough */
+            }
+
+            coefficients128_0 = vld1q_s32(tempC);
+            samples128_0      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* 4 - 7 */
+        if (runningOrder >= 4) {
+            coefficients128_4 = vld1q_s32(coefficients + 4);
+            samples128_4      = vld1q_s32(pSamplesOut  - 8);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7]; /* fallthrough */
+                case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6]; /* fallthrough */
+                case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5]; /* fallthrough */
+            }
+
+            coefficients128_4 = vld1q_s32(tempC);
+            samples128_4      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* 8 - 11 */
+        if (runningOrder == 4) {
+            coefficients128_8 = vld1q_s32(coefficients + 8);
+            samples128_8      = vld1q_s32(pSamplesOut  - 12);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11]; /* fallthrough */
+                case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10]; /* fallthrough */
+                case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9]; /* fallthrough */
+            }
+
+            coefficients128_8 = vld1q_s32(tempC);
+            samples128_8      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
+        coefficients128_0 = drflac__vrevq_s32(coefficients128_0);
+        coefficients128_4 = drflac__vrevq_s32(coefficients128_4);
+        coefficients128_8 = drflac__vrevq_s32(coefficients128_8);
+    }
+
+    /* For this version we are doing one sample at a time. */
+    while (pDecodedSamples < pDecodedSamplesEnd) {
+        int32x4_t prediction128;
+        int32x2_t prediction64;
+        uint32x4_t zeroCountPart128;
+        uint32x4_t riceParamPart128;
+
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[3], &riceParamParts[3])) {
+            return DRFLAC_FALSE;
+        }
+
+        zeroCountPart128 = vld1q_u32(zeroCountParts);
+        riceParamPart128 = vld1q_u32(riceParamParts);
+
+        riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128);
+        riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128));
+        riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128));
+
+        if (order <= 4) {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 = vmulq_s32(coefficients128_0, samples128_0);
+
+                /* Horizontal add and shift. */
+                prediction64 = drflac__vhaddq_s32(prediction128);
+                prediction64 = vshl_s32(prediction64, shift64);
+                prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
+
+                samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
+                riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
+            }
+        } else if (order <= 8) {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 =                vmulq_s32(coefficients128_4, samples128_4);
+                prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0);
+
+                /* Horizontal add and shift. */
+                prediction64 = drflac__vhaddq_s32(prediction128);
+                prediction64 = vshl_s32(prediction64, shift64);
+                prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
+
+                samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
+                samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
+                riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
+            }
+        } else {
+            for (i = 0; i < 4; i += 1) {
+                prediction128 =                vmulq_s32(coefficients128_8, samples128_8);
+                prediction128 = vmlaq_s32(prediction128, coefficients128_4, samples128_4);
+                prediction128 = vmlaq_s32(prediction128, coefficients128_0, samples128_0);
+
+                /* Horizontal add and shift. */
+                prediction64 = drflac__vhaddq_s32(prediction128);
+                prediction64 = vshl_s32(prediction64, shift64);
+                prediction64 = vadd_s32(prediction64, vget_low_s32(vreinterpretq_s32_u32(riceParamPart128)));
+
+                samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8);
+                samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
+                samples128_0 = drflac__valignrq_s32_1(vcombine_s32(prediction64, vdup_n_s32(0)), samples128_0);
+                riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
+            }
+        }
+
+        /* We store samples in groups of 4. */
+        vst1q_s32(pDecodedSamples, samples128_0);
+        pDecodedSamples += 4;
+    }
+
+    /* Make sure we process the last few samples. */
+    i = (count & ~3);
+    while (i < (int)count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0])) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamParts[0] &= riceParamMask;
+        riceParamParts[0] |= (zeroCountParts[0] << riceParam);
+        riceParamParts[0]  = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01];
+
+        /* Sample reconstruction. */
+        pDecodedSamples[0] = riceParamParts[0] + drflac__calculate_prediction_32(order, shift, coefficients, pDecodedSamples);
+
+        i += 1;
+        pDecodedSamples += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__neon_64(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    int i;
+    drflac_uint32 riceParamMask;
+    drflac_int32* pDecodedSamples    = pSamplesOut;
+    drflac_int32* pDecodedSamplesEnd = pSamplesOut + (count & ~3);
+    drflac_uint32 zeroCountParts[4];
+    drflac_uint32 riceParamParts[4];
+    int32x4_t coefficients128_0;
+    int32x4_t coefficients128_4;
+    int32x4_t coefficients128_8;
+    int32x4_t samples128_0;
+    int32x4_t samples128_4;
+    int32x4_t samples128_8;
+    uint32x4_t riceParamMask128;
+    int32x4_t riceParam128;
+    int64x1_t shift64;
+    uint32x4_t one128;
+
+    const drflac_uint32 t[2] = {0x00000000, 0xFFFFFFFF};
+
+    riceParamMask    = ~((~0UL) << riceParam);
+    riceParamMask128 = vdupq_n_u32(riceParamMask);
+
+    riceParam128 = vdupq_n_s32(riceParam);
+    shift64 = vdup_n_s64(-shift); /* Negate the shift because we'll be doing a variable shift using vshlq_s32(). */
+    one128 = vdupq_n_u32(1);
+
+    /*
+    Pre-loading the coefficients and prior samples is annoying because we need to ensure we don't try reading more than
+    what's available in the input buffers. It would be conenient to use a fall-through switch to do this, but this results
+    in strict aliasing warnings with GCC. To work around this I'm just doing something hacky. This feels a bit convoluted
+    so I think there's opportunity for this to be simplified.
+    */
+    {
+        int runningOrder = order;
+        drflac_int32 tempC[4] = {0, 0, 0, 0};
+        drflac_int32 tempS[4] = {0, 0, 0, 0};
+
+        /* 0 - 3. */
+        if (runningOrder >= 4) {
+            coefficients128_0 = vld1q_s32(coefficients + 0);
+            samples128_0      = vld1q_s32(pSamplesOut  - 4);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[2]; tempS[1] = pSamplesOut[-3]; /* fallthrough */
+                case 2: tempC[1] = coefficients[1]; tempS[2] = pSamplesOut[-2]; /* fallthrough */
+                case 1: tempC[0] = coefficients[0]; tempS[3] = pSamplesOut[-1]; /* fallthrough */
+            }
+
+            coefficients128_0 = vld1q_s32(tempC);
+            samples128_0      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* 4 - 7 */
+        if (runningOrder >= 4) {
+            coefficients128_4 = vld1q_s32(coefficients + 4);
+            samples128_4      = vld1q_s32(pSamplesOut  - 8);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[6]; tempS[1] = pSamplesOut[-7]; /* fallthrough */
+                case 2: tempC[1] = coefficients[5]; tempS[2] = pSamplesOut[-6]; /* fallthrough */
+                case 1: tempC[0] = coefficients[4]; tempS[3] = pSamplesOut[-5]; /* fallthrough */
+            }
+
+            coefficients128_4 = vld1q_s32(tempC);
+            samples128_4      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* 8 - 11 */
+        if (runningOrder == 4) {
+            coefficients128_8 = vld1q_s32(coefficients + 8);
+            samples128_8      = vld1q_s32(pSamplesOut  - 12);
+            runningOrder -= 4;
+        } else {
+            switch (runningOrder) {
+                case 3: tempC[2] = coefficients[10]; tempS[1] = pSamplesOut[-11]; /* fallthrough */
+                case 2: tempC[1] = coefficients[ 9]; tempS[2] = pSamplesOut[-10]; /* fallthrough */
+                case 1: tempC[0] = coefficients[ 8]; tempS[3] = pSamplesOut[- 9]; /* fallthrough */
+            }
+
+            coefficients128_8 = vld1q_s32(tempC);
+            samples128_8      = vld1q_s32(tempS);
+            runningOrder = 0;
+        }
+
+        /* Coefficients need to be shuffled for our streaming algorithm below to work. Samples are already in the correct order from the loading routine above. */
+        coefficients128_0 = drflac__vrevq_s32(coefficients128_0);
+        coefficients128_4 = drflac__vrevq_s32(coefficients128_4);
+        coefficients128_8 = drflac__vrevq_s32(coefficients128_8);
+    }
+
+    /* For this version we are doing one sample at a time. */
+    while (pDecodedSamples < pDecodedSamplesEnd) {
+        int64x2_t prediction128;
+        uint32x4_t zeroCountPart128;
+        uint32x4_t riceParamPart128;
+
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[1], &riceParamParts[1]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[2], &riceParamParts[2]) ||
+            !drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[3], &riceParamParts[3])) {
+            return DRFLAC_FALSE;
+        }
+
+        zeroCountPart128 = vld1q_u32(zeroCountParts);
+        riceParamPart128 = vld1q_u32(riceParamParts);
+
+        riceParamPart128 = vandq_u32(riceParamPart128, riceParamMask128);
+        riceParamPart128 = vorrq_u32(riceParamPart128, vshlq_u32(zeroCountPart128, riceParam128));
+        riceParamPart128 = veorq_u32(vshrq_n_u32(riceParamPart128, 1), vaddq_u32(drflac__vnotq_u32(vandq_u32(riceParamPart128, one128)), one128));
+
+        for (i = 0; i < 4; i += 1) {
+            int64x1_t prediction64;
+
+            prediction128 = veorq_s64(prediction128, prediction128);    /* Reset to 0. */
+            switch (order)
+            {
+            case 12:
+            case 11: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_8), vget_low_s32(samples128_8)));
+            case 10:
+            case  9: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_8), vget_high_s32(samples128_8)));
+            case  8:
+            case  7: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_4), vget_low_s32(samples128_4)));
+            case  6:
+            case  5: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_4), vget_high_s32(samples128_4)));
+            case  4:
+            case  3: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_low_s32(coefficients128_0), vget_low_s32(samples128_0)));
+            case  2:
+            case  1: prediction128 = vaddq_s64(prediction128, vmull_s32(vget_high_s32(coefficients128_0), vget_high_s32(samples128_0)));
+            }
+
+            /* Horizontal add and shift. */
+            prediction64 = drflac__vhaddq_s64(prediction128);
+            prediction64 = vshl_s64(prediction64, shift64);
+            prediction64 = vadd_s64(prediction64, vdup_n_s64(vgetq_lane_u32(riceParamPart128, 0)));
+
+            /* Our value should be sitting in prediction64[0]. We need to combine this with our SSE samples. */
+            samples128_8 = drflac__valignrq_s32_1(samples128_4, samples128_8);
+            samples128_4 = drflac__valignrq_s32_1(samples128_0, samples128_4);
+            samples128_0 = drflac__valignrq_s32_1(vcombine_s32(vreinterpret_s32_s64(prediction64), vdup_n_s32(0)), samples128_0);
+
+            /* Slide our rice parameter down so that the value in position 0 contains the next one to process. */
+            riceParamPart128 = drflac__valignrq_u32_1(vdupq_n_u32(0), riceParamPart128);
+        }
+
+        /* We store samples in groups of 4. */
+        vst1q_s32(pDecodedSamples, samples128_0);
+        pDecodedSamples += 4;
+    }
+
+    /* Make sure we process the last few samples. */
+    i = (count & ~3);
+    while (i < (int)count) {
+        /* Rice extraction. */
+        if (!drflac__read_rice_parts_x1(bs, riceParam, &zeroCountParts[0], &riceParamParts[0])) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Rice reconstruction. */
+        riceParamParts[0] &= riceParamMask;
+        riceParamParts[0] |= (zeroCountParts[0] << riceParam);
+        riceParamParts[0]  = (riceParamParts[0] >> 1) ^ t[riceParamParts[0] & 0x01];
+
+        /* Sample reconstruction. */
+        pDecodedSamples[0] = riceParamParts[0] + drflac__calculate_prediction_64(order, shift, coefficients, pDecodedSamples);
+
+        i += 1;
+        pDecodedSamples += 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice__neon(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(count > 0);
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    /* In my testing the order is rarely > 12, so in this case I'm going to simplify the NEON implementation by only handling order <= 12. */
+    if (order > 0 && order <= 12) {
+        if (bitsPerSample+shift > 32) {
+            return drflac__decode_samples_with_residual__rice__neon_64(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
+        } else {
+            return drflac__decode_samples_with_residual__rice__neon_32(bs, count, riceParam, order, shift, coefficients, pSamplesOut);
+        }
+    } else {
+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    }
+}
+#endif
+
+static drflac_bool32 drflac__decode_samples_with_residual__rice(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 riceParam, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+#if defined(DRFLAC_SUPPORT_SSE41)
+    if (drflac__gIsSSE41Supported) {
+        return drflac__decode_samples_with_residual__rice__sse41(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported) {
+        return drflac__decode_samples_with_residual__rice__neon(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+    #if 0
+        return drflac__decode_samples_with_residual__rice__reference(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    #else
+        return drflac__decode_samples_with_residual__rice__scalar(bs, bitsPerSample, count, riceParam, order, shift, coefficients, pSamplesOut);
+    #endif
+    }
+}
+
+/* Reads and seeks past a string of residual values as Rice codes. The decoder should be sitting on the first bit of the Rice codes. */
+static drflac_bool32 drflac__read_and_seek_residual__rice(drflac_bs* bs, drflac_uint32 count, drflac_uint8 riceParam)
+{
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(count > 0);
+
+    for (i = 0; i < count; ++i) {
+        if (!drflac__seek_rice_parts(bs, riceParam)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples_with_residual__unencoded(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 count, drflac_uint8 unencodedBitsPerSample, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pSamplesOut)
+{
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(count > 0);
+    DRFLAC_ASSERT(unencodedBitsPerSample <= 31);    /* <-- unencodedBitsPerSample is a 5 bit number, so cannot exceed 31. */
+    DRFLAC_ASSERT(pSamplesOut != NULL);
+
+    for (i = 0; i < count; ++i) {
+        if (unencodedBitsPerSample > 0) {
+            if (!drflac__read_int32(bs, unencodedBitsPerSample, pSamplesOut + i)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            pSamplesOut[i] = 0;
+        }
+
+        if (bitsPerSample >= 24) {
+            pSamplesOut[i] += drflac__calculate_prediction_64(order, shift, coefficients, pSamplesOut + i);
+        } else {
+            pSamplesOut[i] += drflac__calculate_prediction_32(order, shift, coefficients, pSamplesOut + i);
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+
+
+/*
+Reads and decodes the residual for the sub-frame the decoder is currently sitting on. This function should be called
+when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be ignored. The
+<blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
+*/
+static drflac_bool32 drflac__decode_samples_with_residual(drflac_bs* bs, drflac_uint32 bitsPerSample, drflac_uint32 blockSize, drflac_uint32 order, drflac_int32 shift, const drflac_int32* coefficients, drflac_int32* pDecodedSamples)
+{
+    drflac_uint8 residualMethod;
+    drflac_uint8 partitionOrder;
+    drflac_uint32 samplesInPartition;
+    drflac_uint32 partitionsRemaining;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(blockSize != 0);
+    DRFLAC_ASSERT(pDecodedSamples != NULL);       /* <-- Should we allow NULL, in which case we just seek past the residual rather than do a full decode? */
+
+    if (!drflac__read_uint8(bs, 2, &residualMethod)) {
+        return DRFLAC_FALSE;
+    }
+
+    if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
+        return DRFLAC_FALSE;    /* Unknown or unsupported residual coding method. */
+    }
+
+    /* Ignore the first <order> values. */
+    pDecodedSamples += order;
+
+    if (!drflac__read_uint8(bs, 4, &partitionOrder)) {
+        return DRFLAC_FALSE;
+    }
+
+    /*
+    From the FLAC spec:
+      The Rice partition order in a Rice-coded residual section must be less than or equal to 8.
+    */
+    if (partitionOrder > 8) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Validation check. */
+    if ((blockSize / (1 << partitionOrder)) <= order) {
+        return DRFLAC_FALSE;
+    }
+
+    samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
+    partitionsRemaining = (1 << partitionOrder);
+    for (;;) {
+        drflac_uint8 riceParam = 0;
+        if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) {
+            if (!drflac__read_uint8(bs, 4, &riceParam)) {
+                return DRFLAC_FALSE;
+            }
+            if (riceParam == 15) {
+                riceParam = 0xFF;
+            }
+        } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
+            if (!drflac__read_uint8(bs, 5, &riceParam)) {
+                return DRFLAC_FALSE;
+            }
+            if (riceParam == 31) {
+                riceParam = 0xFF;
+            }
+        }
+
+        if (riceParam != 0xFF) {
+            if (!drflac__decode_samples_with_residual__rice(bs, bitsPerSample, samplesInPartition, riceParam, order, shift, coefficients, pDecodedSamples)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            drflac_uint8 unencodedBitsPerSample = 0;
+            if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (!drflac__decode_samples_with_residual__unencoded(bs, bitsPerSample, samplesInPartition, unencodedBitsPerSample, order, shift, coefficients, pDecodedSamples)) {
+                return DRFLAC_FALSE;
+            }
+        }
+
+        pDecodedSamples += samplesInPartition;
+
+        if (partitionsRemaining == 1) {
+            break;
+        }
+
+        partitionsRemaining -= 1;
+
+        if (partitionOrder != 0) {
+            samplesInPartition = blockSize / (1 << partitionOrder);
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+
+/*
+Reads and seeks past the residual for the sub-frame the decoder is currently sitting on. This function should be called
+when the decoder is sitting at the very start of the RESIDUAL block. The first <order> residuals will be set to 0. The
+<blockSize> and <order> parameters are used to determine how many residual values need to be decoded.
+*/
+static drflac_bool32 drflac__read_and_seek_residual(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 order)
+{
+    drflac_uint8 residualMethod;
+    drflac_uint8 partitionOrder;
+    drflac_uint32 samplesInPartition;
+    drflac_uint32 partitionsRemaining;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(blockSize != 0);
+
+    if (!drflac__read_uint8(bs, 2, &residualMethod)) {
+        return DRFLAC_FALSE;
+    }
+
+    if (residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE && residualMethod != DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
+        return DRFLAC_FALSE;    /* Unknown or unsupported residual coding method. */
+    }
+
+    if (!drflac__read_uint8(bs, 4, &partitionOrder)) {
+        return DRFLAC_FALSE;
+    }
+
+    /*
+    From the FLAC spec:
+      The Rice partition order in a Rice-coded residual section must be less than or equal to 8.
+    */
+    if (partitionOrder > 8) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Validation check. */
+    if ((blockSize / (1 << partitionOrder)) <= order) {
+        return DRFLAC_FALSE;
+    }
+
+    samplesInPartition = (blockSize / (1 << partitionOrder)) - order;
+    partitionsRemaining = (1 << partitionOrder);
+    for (;;)
+    {
+        drflac_uint8 riceParam = 0;
+        if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE) {
+            if (!drflac__read_uint8(bs, 4, &riceParam)) {
+                return DRFLAC_FALSE;
+            }
+            if (riceParam == 15) {
+                riceParam = 0xFF;
+            }
+        } else if (residualMethod == DRFLAC_RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) {
+            if (!drflac__read_uint8(bs, 5, &riceParam)) {
+                return DRFLAC_FALSE;
+            }
+            if (riceParam == 31) {
+                riceParam = 0xFF;
+            }
+        }
+
+        if (riceParam != 0xFF) {
+            if (!drflac__read_and_seek_residual__rice(bs, samplesInPartition, riceParam)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            drflac_uint8 unencodedBitsPerSample = 0;
+            if (!drflac__read_uint8(bs, 5, &unencodedBitsPerSample)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (!drflac__seek_bits(bs, unencodedBitsPerSample * samplesInPartition)) {
+                return DRFLAC_FALSE;
+            }
+        }
+
+
+        if (partitionsRemaining == 1) {
+            break;
+        }
+
+        partitionsRemaining -= 1;
+        samplesInPartition = blockSize / (1 << partitionOrder);
+    }
+
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_bool32 drflac__decode_samples__constant(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_int32* pDecodedSamples)
+{
+    drflac_uint32 i;
+
+    /* Only a single sample needs to be decoded here. */
+    drflac_int32 sample;
+    if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) {
+        return DRFLAC_FALSE;
+    }
+
+    /*
+    We don't really need to expand this, but it does simplify the process of reading samples. If this becomes a performance issue (unlikely)
+    we'll want to look at a more efficient way.
+    */
+    for (i = 0; i < blockSize; ++i) {
+        pDecodedSamples[i] = sample;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples__verbatim(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_int32* pDecodedSamples)
+{
+    drflac_uint32 i;
+
+    for (i = 0; i < blockSize; ++i) {
+        drflac_int32 sample;
+        if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) {
+            return DRFLAC_FALSE;
+        }
+
+        pDecodedSamples[i] = sample;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples__fixed(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 subframeBitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples)
+{
+    drflac_uint32 i;
+
+    static drflac_int32 lpcCoefficientsTable[5][4] = {
+        {0,  0, 0,  0},
+        {1,  0, 0,  0},
+        {2, -1, 0,  0},
+        {3, -3, 1,  0},
+        {4, -6, 4, -1}
+    };
+
+    /* Warm up samples and coefficients. */
+    for (i = 0; i < lpcOrder; ++i) {
+        drflac_int32 sample;
+        if (!drflac__read_int32(bs, subframeBitsPerSample, &sample)) {
+            return DRFLAC_FALSE;
+        }
+
+        pDecodedSamples[i] = sample;
+    }
+
+    if (!drflac__decode_samples_with_residual(bs, subframeBitsPerSample, blockSize, lpcOrder, 0, lpcCoefficientsTable[lpcOrder], pDecodedSamples)) {
+        return DRFLAC_FALSE;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_samples__lpc(drflac_bs* bs, drflac_uint32 blockSize, drflac_uint32 bitsPerSample, drflac_uint8 lpcOrder, drflac_int32* pDecodedSamples)
+{
+    drflac_uint8 i;
+    drflac_uint8 lpcPrecision;
+    drflac_int8 lpcShift;
+    drflac_int32 coefficients[32];
+
+    /* Warm up samples. */
+    for (i = 0; i < lpcOrder; ++i) {
+        drflac_int32 sample;
+        if (!drflac__read_int32(bs, bitsPerSample, &sample)) {
+            return DRFLAC_FALSE;
+        }
+
+        pDecodedSamples[i] = sample;
+    }
+
+    if (!drflac__read_uint8(bs, 4, &lpcPrecision)) {
+        return DRFLAC_FALSE;
+    }
+    if (lpcPrecision == 15) {
+        return DRFLAC_FALSE;    /* Invalid. */
+    }
+    lpcPrecision += 1;
+
+    if (!drflac__read_int8(bs, 5, &lpcShift)) {
+        return DRFLAC_FALSE;
+    }
+
+    DRFLAC_ZERO_MEMORY(coefficients, sizeof(coefficients));
+    for (i = 0; i < lpcOrder; ++i) {
+        if (!drflac__read_int32(bs, lpcPrecision, coefficients + i)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    if (!drflac__decode_samples_with_residual(bs, bitsPerSample, blockSize, lpcOrder, lpcShift, coefficients, pDecodedSamples)) {
+        return DRFLAC_FALSE;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_bool32 drflac__read_next_flac_frame_header(drflac_bs* bs, drflac_uint8 streaminfoBitsPerSample, drflac_frame_header* header)
+{
+    const drflac_uint32 sampleRateTable[12]  = {0, 88200, 176400, 192000, 8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000};
+    const drflac_uint8 bitsPerSampleTable[8] = {0, 8, 12, (drflac_uint8)-1, 16, 20, 24, (drflac_uint8)-1};   /* -1 = reserved. */
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(header != NULL);
+
+    /* Keep looping until we find a valid sync code. */
+    for (;;) {
+        drflac_uint8 crc8 = 0xCE; /* 0xCE = drflac_crc8(0, 0x3FFE, 14); */
+        drflac_uint8 reserved = 0;
+        drflac_uint8 blockingStrategy = 0;
+        drflac_uint8 blockSize = 0;
+        drflac_uint8 sampleRate = 0;
+        drflac_uint8 channelAssignment = 0;
+        drflac_uint8 bitsPerSample = 0;
+        drflac_bool32 isVariableBlockSize;
+
+        if (!drflac__find_and_seek_to_next_sync_code(bs)) {
+            return DRFLAC_FALSE;
+        }
+
+        if (!drflac__read_uint8(bs, 1, &reserved)) {
+            return DRFLAC_FALSE;
+        }
+        if (reserved == 1) {
+            continue;
+        }
+        crc8 = drflac_crc8(crc8, reserved, 1);
+
+        if (!drflac__read_uint8(bs, 1, &blockingStrategy)) {
+            return DRFLAC_FALSE;
+        }
+        crc8 = drflac_crc8(crc8, blockingStrategy, 1);
+
+        if (!drflac__read_uint8(bs, 4, &blockSize)) {
+            return DRFLAC_FALSE;
+        }
+        if (blockSize == 0) {
+            continue;
+        }
+        crc8 = drflac_crc8(crc8, blockSize, 4);
+
+        if (!drflac__read_uint8(bs, 4, &sampleRate)) {
+            return DRFLAC_FALSE;
+        }
+        crc8 = drflac_crc8(crc8, sampleRate, 4);
+
+        if (!drflac__read_uint8(bs, 4, &channelAssignment)) {
+            return DRFLAC_FALSE;
+        }
+        if (channelAssignment > 10) {
+            continue;
+        }
+        crc8 = drflac_crc8(crc8, channelAssignment, 4);
+
+        if (!drflac__read_uint8(bs, 3, &bitsPerSample)) {
+            return DRFLAC_FALSE;
+        }
+        if (bitsPerSample == 3 || bitsPerSample == 7) {
+            continue;
+        }
+        crc8 = drflac_crc8(crc8, bitsPerSample, 3);
+
+
+        if (!drflac__read_uint8(bs, 1, &reserved)) {
+            return DRFLAC_FALSE;
+        }
+        if (reserved == 1) {
+            continue;
+        }
+        crc8 = drflac_crc8(crc8, reserved, 1);
+
+
+        isVariableBlockSize = blockingStrategy == 1;
+        if (isVariableBlockSize) {
+            drflac_uint64 pcmFrameNumber;
+            drflac_result result = drflac__read_utf8_coded_number(bs, &pcmFrameNumber, &crc8);
+            if (result != DRFLAC_SUCCESS) {
+                if (result == DRFLAC_AT_END) {
+                    return DRFLAC_FALSE;
+                } else {
+                    continue;
+                }
+            }
+            header->flacFrameNumber  = 0;
+            header->pcmFrameNumber = pcmFrameNumber;
+        } else {
+            drflac_uint64 flacFrameNumber = 0;
+            drflac_result result = drflac__read_utf8_coded_number(bs, &flacFrameNumber, &crc8);
+            if (result != DRFLAC_SUCCESS) {
+                if (result == DRFLAC_AT_END) {
+                    return DRFLAC_FALSE;
+                } else {
+                    continue;
+                }
+            }
+            header->flacFrameNumber  = (drflac_uint32)flacFrameNumber;   /* <-- Safe cast. */
+            header->pcmFrameNumber = 0;
+        }
+
+
+        DRFLAC_ASSERT(blockSize > 0);
+        if (blockSize == 1) {
+            header->blockSizeInPCMFrames = 192;
+        } else if (blockSize >= 2 && blockSize <= 5) {
+            header->blockSizeInPCMFrames = 576 * (1 << (blockSize - 2));
+        } else if (blockSize == 6) {
+            if (!drflac__read_uint16(bs, 8, &header->blockSizeInPCMFrames)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 8);
+            header->blockSizeInPCMFrames += 1;
+        } else if (blockSize == 7) {
+            if (!drflac__read_uint16(bs, 16, &header->blockSizeInPCMFrames)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->blockSizeInPCMFrames, 16);
+            header->blockSizeInPCMFrames += 1;
+        } else {
+            DRFLAC_ASSERT(blockSize >= 8);
+            header->blockSizeInPCMFrames = 256 * (1 << (blockSize - 8));
+        }
+
+
+        if (sampleRate <= 11) {
+            header->sampleRate = sampleRateTable[sampleRate];
+        } else if (sampleRate == 12) {
+            if (!drflac__read_uint32(bs, 8, &header->sampleRate)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->sampleRate, 8);
+            header->sampleRate *= 1000;
+        } else if (sampleRate == 13) {
+            if (!drflac__read_uint32(bs, 16, &header->sampleRate)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->sampleRate, 16);
+        } else if (sampleRate == 14) {
+            if (!drflac__read_uint32(bs, 16, &header->sampleRate)) {
+                return DRFLAC_FALSE;
+            }
+            crc8 = drflac_crc8(crc8, header->sampleRate, 16);
+            header->sampleRate *= 10;
+        } else {
+            continue;  /* Invalid. Assume an invalid block. */
+        }
+
+
+        header->channelAssignment = channelAssignment;
+
+        header->bitsPerSample = bitsPerSampleTable[bitsPerSample];
+        if (header->bitsPerSample == 0) {
+            header->bitsPerSample = streaminfoBitsPerSample;
+        }
+
+        if (!drflac__read_uint8(bs, 8, &header->crc8)) {
+            return DRFLAC_FALSE;
+        }
+
+#ifndef DR_FLAC_NO_CRC
+        if (header->crc8 != crc8) {
+            continue;    /* CRC mismatch. Loop back to the top and find the next sync code. */
+        }
+#endif
+        return DRFLAC_TRUE;
+    }
+}
+
+static drflac_bool32 drflac__read_subframe_header(drflac_bs* bs, drflac_subframe* pSubframe)
+{
+    drflac_uint8 header;
+    int type;
+
+    if (!drflac__read_uint8(bs, 8, &header)) {
+        return DRFLAC_FALSE;
+    }
+
+    /* First bit should always be 0. */
+    if ((header & 0x80) != 0) {
+        return DRFLAC_FALSE;
+    }
+
+    type = (header & 0x7E) >> 1;
+    if (type == 0) {
+        pSubframe->subframeType = DRFLAC_SUBFRAME_CONSTANT;
+    } else if (type == 1) {
+        pSubframe->subframeType = DRFLAC_SUBFRAME_VERBATIM;
+    } else {
+        if ((type & 0x20) != 0) {
+            pSubframe->subframeType = DRFLAC_SUBFRAME_LPC;
+            pSubframe->lpcOrder = (drflac_uint8)(type & 0x1F) + 1;
+        } else if ((type & 0x08) != 0) {
+            pSubframe->subframeType = DRFLAC_SUBFRAME_FIXED;
+            pSubframe->lpcOrder = (drflac_uint8)(type & 0x07);
+            if (pSubframe->lpcOrder > 4) {
+                pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED;
+                pSubframe->lpcOrder = 0;
+            }
+        } else {
+            pSubframe->subframeType = DRFLAC_SUBFRAME_RESERVED;
+        }
+    }
+
+    if (pSubframe->subframeType == DRFLAC_SUBFRAME_RESERVED) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Wasted bits per sample. */
+    pSubframe->wastedBitsPerSample = 0;
+    if ((header & 0x01) == 1) {
+        unsigned int wastedBitsPerSample;
+        if (!drflac__seek_past_next_set_bit(bs, &wastedBitsPerSample)) {
+            return DRFLAC_FALSE;
+        }
+        pSubframe->wastedBitsPerSample = (drflac_uint8)wastedBitsPerSample + 1;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex, drflac_int32* pDecodedSamplesOut)
+{
+    drflac_subframe* pSubframe;
+    drflac_uint32 subframeBitsPerSample;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(frame != NULL);
+
+    pSubframe = frame->subframes + subframeIndex;
+    if (!drflac__read_subframe_header(bs, pSubframe)) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Side channels require an extra bit per sample. Took a while to figure that one out... */
+    subframeBitsPerSample = frame->header.bitsPerSample;
+    if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) {
+        subframeBitsPerSample += 1;
+    } else if (frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) {
+        subframeBitsPerSample += 1;
+    }
+
+    /* Need to handle wasted bits per sample. */
+    if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) {
+        return DRFLAC_FALSE;
+    }
+    subframeBitsPerSample -= pSubframe->wastedBitsPerSample;
+
+    pSubframe->pSamplesS32 = pDecodedSamplesOut;
+
+    switch (pSubframe->subframeType)
+    {
+        case DRFLAC_SUBFRAME_CONSTANT:
+        {
+            drflac__decode_samples__constant(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->pSamplesS32);
+        } break;
+
+        case DRFLAC_SUBFRAME_VERBATIM:
+        {
+            drflac__decode_samples__verbatim(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->pSamplesS32);
+        } break;
+
+        case DRFLAC_SUBFRAME_FIXED:
+        {
+            drflac__decode_samples__fixed(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->lpcOrder, pSubframe->pSamplesS32);
+        } break;
+
+        case DRFLAC_SUBFRAME_LPC:
+        {
+            drflac__decode_samples__lpc(bs, frame->header.blockSizeInPCMFrames, subframeBitsPerSample, pSubframe->lpcOrder, pSubframe->pSamplesS32);
+        } break;
+
+        default: return DRFLAC_FALSE;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__seek_subframe(drflac_bs* bs, drflac_frame* frame, int subframeIndex)
+{
+    drflac_subframe* pSubframe;
+    drflac_uint32 subframeBitsPerSample;
+
+    DRFLAC_ASSERT(bs != NULL);
+    DRFLAC_ASSERT(frame != NULL);
+
+    pSubframe = frame->subframes + subframeIndex;
+    if (!drflac__read_subframe_header(bs, pSubframe)) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Side channels require an extra bit per sample. Took a while to figure that one out... */
+    subframeBitsPerSample = frame->header.bitsPerSample;
+    if ((frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE || frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE) && subframeIndex == 1) {
+        subframeBitsPerSample += 1;
+    } else if (frame->header.channelAssignment == DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE && subframeIndex == 0) {
+        subframeBitsPerSample += 1;
+    }
+
+    /* Need to handle wasted bits per sample. */
+    if (pSubframe->wastedBitsPerSample >= subframeBitsPerSample) {
+        return DRFLAC_FALSE;
+    }
+    subframeBitsPerSample -= pSubframe->wastedBitsPerSample;
+
+    pSubframe->pSamplesS32 = NULL;
+
+    switch (pSubframe->subframeType)
+    {
+        case DRFLAC_SUBFRAME_CONSTANT:
+        {
+            if (!drflac__seek_bits(bs, subframeBitsPerSample)) {
+                return DRFLAC_FALSE;
+            }
+        } break;
+
+        case DRFLAC_SUBFRAME_VERBATIM:
+        {
+            unsigned int bitsToSeek = frame->header.blockSizeInPCMFrames * subframeBitsPerSample;
+            if (!drflac__seek_bits(bs, bitsToSeek)) {
+                return DRFLAC_FALSE;
+            }
+        } break;
+
+        case DRFLAC_SUBFRAME_FIXED:
+        {
+            unsigned int bitsToSeek = pSubframe->lpcOrder * subframeBitsPerSample;
+            if (!drflac__seek_bits(bs, bitsToSeek)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (!drflac__read_and_seek_residual(bs, frame->header.blockSizeInPCMFrames, pSubframe->lpcOrder)) {
+                return DRFLAC_FALSE;
+            }
+        } break;
+
+        case DRFLAC_SUBFRAME_LPC:
+        {
+            drflac_uint8 lpcPrecision;
+
+            unsigned int bitsToSeek = pSubframe->lpcOrder * subframeBitsPerSample;
+            if (!drflac__seek_bits(bs, bitsToSeek)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (!drflac__read_uint8(bs, 4, &lpcPrecision)) {
+                return DRFLAC_FALSE;
+            }
+            if (lpcPrecision == 15) {
+                return DRFLAC_FALSE;    /* Invalid. */
+            }
+            lpcPrecision += 1;
+
+
+            bitsToSeek = (pSubframe->lpcOrder * lpcPrecision) + 5;    /* +5 for shift. */
+            if (!drflac__seek_bits(bs, bitsToSeek)) {
+                return DRFLAC_FALSE;
+            }
+
+            if (!drflac__read_and_seek_residual(bs, frame->header.blockSizeInPCMFrames, pSubframe->lpcOrder)) {
+                return DRFLAC_FALSE;
+            }
+        } break;
+
+        default: return DRFLAC_FALSE;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+
+static DRFLAC_INLINE drflac_uint8 drflac__get_channel_count_from_channel_assignment(drflac_int8 channelAssignment)
+{
+    drflac_uint8 lookup[] = {1, 2, 3, 4, 5, 6, 7, 8, 2, 2, 2};
+
+    DRFLAC_ASSERT(channelAssignment <= 10);
+    return lookup[channelAssignment];
+}
+
+static drflac_result drflac__decode_flac_frame(drflac* pFlac)
+{
+    int channelCount;
+    int i;
+    drflac_uint8 paddingSizeInBits;
+    drflac_uint16 desiredCRC16;
+#ifndef DR_FLAC_NO_CRC
+    drflac_uint16 actualCRC16;
+#endif
+
+    /* This function should be called while the stream is sitting on the first byte after the frame header. */
+    DRFLAC_ZERO_MEMORY(pFlac->currentFLACFrame.subframes, sizeof(pFlac->currentFLACFrame.subframes));
+
+    /* The frame block size must never be larger than the maximum block size defined by the FLAC stream. */
+    if (pFlac->currentFLACFrame.header.blockSizeInPCMFrames > pFlac->maxBlockSizeInPCMFrames) {
+        return DRFLAC_ERROR;
+    }
+
+    /* The number of channels in the frame must match the channel count from the STREAMINFO block. */
+    channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
+    if (channelCount != (int)pFlac->channels) {
+        return DRFLAC_ERROR;
+    }
+
+    for (i = 0; i < channelCount; ++i) {
+        if (!drflac__decode_subframe(&pFlac->bs, &pFlac->currentFLACFrame, i, pFlac->pDecodedSamples + (pFlac->currentFLACFrame.header.blockSizeInPCMFrames * i))) {
+            return DRFLAC_ERROR;
+        }
+    }
+
+    paddingSizeInBits = (drflac_uint8)(DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7);
+    if (paddingSizeInBits > 0) {
+        drflac_uint8 padding = 0;
+        if (!drflac__read_uint8(&pFlac->bs, paddingSizeInBits, &padding)) {
+            return DRFLAC_AT_END;
+        }
+    }
+
+#ifndef DR_FLAC_NO_CRC
+    actualCRC16 = drflac__flush_crc16(&pFlac->bs);
+#endif
+    if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) {
+        return DRFLAC_AT_END;
+    }
+
+#ifndef DR_FLAC_NO_CRC
+    if (actualCRC16 != desiredCRC16) {
+        return DRFLAC_CRC_MISMATCH;    /* CRC mismatch. */
+    }
+#endif
+
+    pFlac->currentFLACFrame.pcmFramesRemaining = pFlac->currentFLACFrame.header.blockSizeInPCMFrames;
+
+    return DRFLAC_SUCCESS;
+}
+
+static drflac_result drflac__seek_flac_frame(drflac* pFlac)
+{
+    int channelCount;
+    int i;
+    drflac_uint16 desiredCRC16;
+#ifndef DR_FLAC_NO_CRC
+    drflac_uint16 actualCRC16;
+#endif
+
+    channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
+    for (i = 0; i < channelCount; ++i) {
+        if (!drflac__seek_subframe(&pFlac->bs, &pFlac->currentFLACFrame, i)) {
+            return DRFLAC_ERROR;
+        }
+    }
+
+    /* Padding. */
+    if (!drflac__seek_bits(&pFlac->bs, DRFLAC_CACHE_L1_BITS_REMAINING(&pFlac->bs) & 7)) {
+        return DRFLAC_ERROR;
+    }
+
+    /* CRC. */
+#ifndef DR_FLAC_NO_CRC
+    actualCRC16 = drflac__flush_crc16(&pFlac->bs);
+#endif
+    if (!drflac__read_uint16(&pFlac->bs, 16, &desiredCRC16)) {
+        return DRFLAC_AT_END;
+    }
+
+#ifndef DR_FLAC_NO_CRC
+    if (actualCRC16 != desiredCRC16) {
+        return DRFLAC_CRC_MISMATCH;    /* CRC mismatch. */
+    }
+#endif
+
+    return DRFLAC_SUCCESS;
+}
+
+static drflac_bool32 drflac__read_and_decode_next_flac_frame(drflac* pFlac)
+{
+    DRFLAC_ASSERT(pFlac != NULL);
+
+    for (;;) {
+        drflac_result result;
+
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+
+        result = drflac__decode_flac_frame(pFlac);
+        if (result != DRFLAC_SUCCESS) {
+            if (result == DRFLAC_CRC_MISMATCH) {
+                continue;   /* CRC mismatch. Skip to the next frame. */
+            } else {
+                return DRFLAC_FALSE;
+            }
+        }
+
+        return DRFLAC_TRUE;
+    }
+}
+
+static void drflac__get_pcm_frame_range_of_current_flac_frame(drflac* pFlac, drflac_uint64* pFirstPCMFrame, drflac_uint64* pLastPCMFrame)
+{
+    drflac_uint64 firstPCMFrame;
+    drflac_uint64 lastPCMFrame;
+
+    DRFLAC_ASSERT(pFlac != NULL);
+
+    firstPCMFrame = pFlac->currentFLACFrame.header.pcmFrameNumber;
+    if (firstPCMFrame == 0) {
+        firstPCMFrame = ((drflac_uint64)pFlac->currentFLACFrame.header.flacFrameNumber) * pFlac->maxBlockSizeInPCMFrames;
+    }
+
+    lastPCMFrame = firstPCMFrame + pFlac->currentFLACFrame.header.blockSizeInPCMFrames;
+    if (lastPCMFrame > 0) {
+        lastPCMFrame -= 1; /* Needs to be zero based. */
+    }
+
+    if (pFirstPCMFrame) {
+        *pFirstPCMFrame = firstPCMFrame;
+    }
+    if (pLastPCMFrame) {
+        *pLastPCMFrame = lastPCMFrame;
+    }
+}
+
+static drflac_bool32 drflac__seek_to_first_frame(drflac* pFlac)
+{
+    drflac_bool32 result;
+
+    DRFLAC_ASSERT(pFlac != NULL);
+
+    result = drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes);
+
+    DRFLAC_ZERO_MEMORY(&pFlac->currentFLACFrame, sizeof(pFlac->currentFLACFrame));
+    pFlac->currentPCMFrame = 0;
+
+    return result;
+}
+
+static DRFLAC_INLINE drflac_result drflac__seek_to_next_flac_frame(drflac* pFlac)
+{
+    /* This function should only ever be called while the decoder is sitting on the first byte past the FRAME_HEADER section. */
+    DRFLAC_ASSERT(pFlac != NULL);
+    return drflac__seek_flac_frame(pFlac);
+}
+
+
+static drflac_uint64 drflac__seek_forward_by_pcm_frames(drflac* pFlac, drflac_uint64 pcmFramesToSeek)
+{
+    drflac_uint64 pcmFramesRead = 0;
+    while (pcmFramesToSeek > 0) {
+        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                break;  /* Couldn't read the next frame, so just break from the loop and return. */
+            }
+        } else {
+            if (pFlac->currentFLACFrame.pcmFramesRemaining > pcmFramesToSeek) {
+                pcmFramesRead   += pcmFramesToSeek;
+                pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)pcmFramesToSeek;   /* <-- Safe cast. Will always be < currentFrame.pcmFramesRemaining < 65536. */
+                pcmFramesToSeek  = 0;
+            } else {
+                pcmFramesRead   += pFlac->currentFLACFrame.pcmFramesRemaining;
+                pcmFramesToSeek -= pFlac->currentFLACFrame.pcmFramesRemaining;
+                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
+            }
+        }
+    }
+
+    pFlac->currentPCMFrame += pcmFramesRead;
+    return pcmFramesRead;
+}
+
+
+static drflac_bool32 drflac__seek_to_pcm_frame__brute_force(drflac* pFlac, drflac_uint64 pcmFrameIndex)
+{
+    drflac_bool32 isMidFrame = DRFLAC_FALSE;
+    drflac_uint64 runningPCMFrameCount;
+
+    DRFLAC_ASSERT(pFlac != NULL);
+
+    /* If we are seeking forward we start from the current position. Otherwise we need to start all the way from the start of the file. */
+    if (pcmFrameIndex >= pFlac->currentPCMFrame) {
+        /* Seeking forward. Need to seek from the current position. */
+        runningPCMFrameCount = pFlac->currentPCMFrame;
+
+        /* The frame header for the first frame may not yet have been read. We need to do that if necessary. */
+        if (pFlac->currentPCMFrame == 0 && pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            isMidFrame = DRFLAC_TRUE;
+        }
+    } else {
+        /* Seeking backwards. Need to seek from the start of the file. */
+        runningPCMFrameCount = 0;
+
+        /* Move back to the start. */
+        if (!drflac__seek_to_first_frame(pFlac)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Decode the first frame in preparation for sample-exact seeking below. */
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    /*
+    We need to as quickly as possible find the frame that contains the target sample. To do this, we iterate over each frame and inspect its
+    header. If based on the header we can determine that the frame contains the sample, we do a full decode of that frame.
+    */
+    for (;;) {
+        drflac_uint64 pcmFrameCountInThisFLACFrame;
+        drflac_uint64 firstPCMFrameInFLACFrame = 0;
+        drflac_uint64 lastPCMFrameInFLACFrame = 0;
+
+        drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame);
+
+        pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
+        if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) {
+            /*
+            The sample should be in this frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend
+            it never existed and keep iterating.
+            */
+            drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount;
+
+            if (!isMidFrame) {
+                drflac_result result = drflac__decode_flac_frame(pFlac);
+                if (result == DRFLAC_SUCCESS) {
+                    /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */
+                    return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;  /* <-- If this fails, something bad has happened (it should never fail). */
+                } else {
+                    if (result == DRFLAC_CRC_MISMATCH) {
+                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
+                    } else {
+                        return DRFLAC_FALSE;
+                    }
+                }
+            } else {
+                /* We started seeking mid-frame which means we need to skip the frame decoding part. */
+                return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;
+            }
+        } else {
+            /*
+            It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
+            frame never existed and leave the running sample count untouched.
+            */
+            if (!isMidFrame) {
+                drflac_result result = drflac__seek_to_next_flac_frame(pFlac);
+                if (result == DRFLAC_SUCCESS) {
+                    runningPCMFrameCount += pcmFrameCountInThisFLACFrame;
+                } else {
+                    if (result == DRFLAC_CRC_MISMATCH) {
+                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
+                    } else {
+                        return DRFLAC_FALSE;
+                    }
+                }
+            } else {
+                /*
+                We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with
+                drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header.
+                */
+                runningPCMFrameCount += pFlac->currentFLACFrame.pcmFramesRemaining;
+                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
+                isMidFrame = DRFLAC_FALSE;
+            }
+
+            /* If we are seeking to the end of the file and we've just hit it, we're done. */
+            if (pcmFrameIndex == pFlac->totalPCMFrameCount && runningPCMFrameCount == pFlac->totalPCMFrameCount) {
+                return DRFLAC_TRUE;
+            }
+        }
+
+    next_iteration:
+        /* Grab the next frame in preparation for the next iteration. */
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+    }
+}
+
+
+#if !defined(DR_FLAC_NO_CRC)
+/*
+We use an average compression ratio to determine our approximate start location. FLAC files are generally about 50%-70% the size of their
+uncompressed counterparts so we'll use this as a basis. I'm going to split the middle and use a factor of 0.6 to determine the starting
+location.
+*/
+#define DRFLAC_BINARY_SEARCH_APPROX_COMPRESSION_RATIO 0.6f
+
+static drflac_bool32 drflac__seek_to_approximate_flac_frame_to_byte(drflac* pFlac, drflac_uint64 targetByte, drflac_uint64 rangeLo, drflac_uint64 rangeHi, drflac_uint64* pLastSuccessfulSeekOffset)
+{
+    DRFLAC_ASSERT(pFlac != NULL);
+    DRFLAC_ASSERT(pLastSuccessfulSeekOffset != NULL);
+    DRFLAC_ASSERT(targetByte >= rangeLo);
+    DRFLAC_ASSERT(targetByte <= rangeHi);
+
+    *pLastSuccessfulSeekOffset = pFlac->firstFLACFramePosInBytes;
+
+    for (;;) {
+        /* When seeking to a byte, failure probably means we've attempted to seek beyond the end of the stream. To counter this we just halve it each attempt. */
+        if (!drflac__seek_to_byte(&pFlac->bs, targetByte)) {
+            /* If we couldn't even seek to the first byte in the stream we have a problem. Just abandon the whole thing. */
+            if (targetByte == 0) {
+                drflac__seek_to_first_frame(pFlac); /* Try to recover. */
+                return DRFLAC_FALSE;
+            }
+
+            /* Halve the byte location and continue. */
+            targetByte = rangeLo + ((rangeHi - rangeLo)/2);
+            rangeHi = targetByte;
+        } else {
+            /* Getting here should mean that we have seeked to an appropriate byte. */
+
+            /* Clear the details of the FLAC frame so we don't misreport data. */
+            DRFLAC_ZERO_MEMORY(&pFlac->currentFLACFrame, sizeof(pFlac->currentFLACFrame));
+
+            /*
+            Now seek to the next FLAC frame. We need to decode the entire frame (not just the header) because it's possible for the header to incorrectly pass the
+            CRC check and return bad data. We need to decode the entire frame to be more certain. Although this seems unlikely, this has happened to me in testing
+            so it needs to stay this way for now.
+            */
+#if 1
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                /* Halve the byte location and continue. */
+                targetByte = rangeLo + ((rangeHi - rangeLo)/2);
+                rangeHi = targetByte;
+            } else {
+                break;
+            }
+#else
+            if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+                /* Halve the byte location and continue. */
+                targetByte = rangeLo + ((rangeHi - rangeLo)/2);
+                rangeHi = targetByte;
+            } else {
+                break;
+            }
+#endif
+        }
+    }
+
+    /* The current PCM frame needs to be updated based on the frame we just seeked to. */
+    drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &pFlac->currentPCMFrame, NULL);
+
+    DRFLAC_ASSERT(targetByte <= rangeHi);
+
+    *pLastSuccessfulSeekOffset = targetByte;
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(drflac* pFlac, drflac_uint64 offset)
+{
+    /* This section of code would be used if we were only decoding the FLAC frame header when calling drflac__seek_to_approximate_flac_frame_to_byte(). */
+#if 0
+    if (drflac__decode_flac_frame(pFlac) != DRFLAC_SUCCESS) {
+        /* We failed to decode this frame which may be due to it being corrupt. We'll just use the next valid FLAC frame. */
+        if (drflac__read_and_decode_next_flac_frame(pFlac) == DRFLAC_FALSE) {
+            return DRFLAC_FALSE;
+        }
+    }
+#endif
+
+    return drflac__seek_forward_by_pcm_frames(pFlac, offset) == offset;
+}
+
+
+static drflac_bool32 drflac__seek_to_pcm_frame__binary_search_internal(drflac* pFlac, drflac_uint64 pcmFrameIndex, drflac_uint64 byteRangeLo, drflac_uint64 byteRangeHi)
+{
+    /* This assumes pFlac->currentPCMFrame is sitting on byteRangeLo upon entry. */
+
+    drflac_uint64 targetByte;
+    drflac_uint64 pcmRangeLo = pFlac->totalPCMFrameCount;
+    drflac_uint64 pcmRangeHi = 0;
+    drflac_uint64 lastSuccessfulSeekOffset = (drflac_uint64)-1;
+    drflac_uint64 closestSeekOffsetBeforeTargetPCMFrame = byteRangeLo;
+    drflac_uint32 seekForwardThreshold = (pFlac->maxBlockSizeInPCMFrames != 0) ? pFlac->maxBlockSizeInPCMFrames*2 : 4096;
+
+    targetByte = byteRangeLo + (drflac_uint64)(((drflac_int64)((pcmFrameIndex - pFlac->currentPCMFrame) * pFlac->channels * pFlac->bitsPerSample)/8.0f) * DRFLAC_BINARY_SEARCH_APPROX_COMPRESSION_RATIO);
+    if (targetByte > byteRangeHi) {
+        targetByte = byteRangeHi;
+    }
+
+    for (;;) {
+        if (drflac__seek_to_approximate_flac_frame_to_byte(pFlac, targetByte, byteRangeLo, byteRangeHi, &lastSuccessfulSeekOffset)) {
+            /* We found a FLAC frame. We need to check if it contains the sample we're looking for. */
+            drflac_uint64 newPCMRangeLo;
+            drflac_uint64 newPCMRangeHi;
+            drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &newPCMRangeLo, &newPCMRangeHi);
+
+            /* If we selected the same frame, it means we should be pretty close. Just decode the rest. */
+            if (pcmRangeLo == newPCMRangeLo) {
+                if (!drflac__seek_to_approximate_flac_frame_to_byte(pFlac, closestSeekOffsetBeforeTargetPCMFrame, closestSeekOffsetBeforeTargetPCMFrame, byteRangeHi, &lastSuccessfulSeekOffset)) {
+                    break;  /* Failed to seek to closest frame. */
+                }
+
+                if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame)) {
+                    return DRFLAC_TRUE;
+                } else {
+                    break;  /* Failed to seek forward. */
+                }
+            }
+
+            pcmRangeLo = newPCMRangeLo;
+            pcmRangeHi = newPCMRangeHi;
+
+            if (pcmRangeLo <= pcmFrameIndex && pcmRangeHi >= pcmFrameIndex) {
+                /* The target PCM frame is in this FLAC frame. */
+                if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame) ) {
+                    return DRFLAC_TRUE;
+                } else {
+                    break;  /* Failed to seek to FLAC frame. */
+                }
+            } else {
+                const float approxCompressionRatio = (drflac_int64)(lastSuccessfulSeekOffset - pFlac->firstFLACFramePosInBytes) / ((drflac_int64)(pcmRangeLo * pFlac->channels * pFlac->bitsPerSample)/8.0f);
+
+                if (pcmRangeLo > pcmFrameIndex) {
+                    /* We seeked too far forward. We need to move our target byte backward and try again. */
+                    byteRangeHi = lastSuccessfulSeekOffset;
+                    if (byteRangeLo > byteRangeHi) {
+                        byteRangeLo = byteRangeHi;
+                    }
+
+                    targetByte = byteRangeLo + ((byteRangeHi - byteRangeLo) / 2);
+                    if (targetByte < byteRangeLo) {
+                        targetByte = byteRangeLo;
+                    }
+                } else /*if (pcmRangeHi < pcmFrameIndex)*/ {
+                    /* We didn't seek far enough. We need to move our target byte forward and try again. */
+
+                    /* If we're close enough we can just seek forward. */
+                    if ((pcmFrameIndex - pcmRangeLo) < seekForwardThreshold) {
+                        if (drflac__decode_flac_frame_and_seek_forward_by_pcm_frames(pFlac, pcmFrameIndex - pFlac->currentPCMFrame)) {
+                            return DRFLAC_TRUE;
+                        } else {
+                            break;  /* Failed to seek to FLAC frame. */
+                        }
+                    } else {
+                        byteRangeLo = lastSuccessfulSeekOffset;
+                        if (byteRangeHi < byteRangeLo) {
+                            byteRangeHi = byteRangeLo;
+                        }
+
+                        targetByte = lastSuccessfulSeekOffset + (drflac_uint64)(((drflac_int64)((pcmFrameIndex-pcmRangeLo) * pFlac->channels * pFlac->bitsPerSample)/8.0f) * approxCompressionRatio);
+                        if (targetByte > byteRangeHi) {
+                            targetByte = byteRangeHi;
+                        }
+
+                        if (closestSeekOffsetBeforeTargetPCMFrame < lastSuccessfulSeekOffset) {
+                            closestSeekOffsetBeforeTargetPCMFrame = lastSuccessfulSeekOffset;
+                        }
+                    }
+                }
+            }
+        } else {
+            /* Getting here is really bad. We just recover as best we can, but moving to the first frame in the stream, and then abort. */
+            break;
+        }
+    }
+
+    drflac__seek_to_first_frame(pFlac); /* <-- Try to recover. */
+    return DRFLAC_FALSE;
+}
+
+static drflac_bool32 drflac__seek_to_pcm_frame__binary_search(drflac* pFlac, drflac_uint64 pcmFrameIndex)
+{
+    drflac_uint64 byteRangeLo;
+    drflac_uint64 byteRangeHi;
+    drflac_uint32 seekForwardThreshold = (pFlac->maxBlockSizeInPCMFrames != 0) ? pFlac->maxBlockSizeInPCMFrames*2 : 4096;
+
+    /* Our algorithm currently assumes the FLAC stream is currently sitting at the start. */
+    if (drflac__seek_to_first_frame(pFlac) == DRFLAC_FALSE) {
+        return DRFLAC_FALSE;
+    }
+
+    /* If we're close enough to the start, just move to the start and seek forward. */
+    if (pcmFrameIndex < seekForwardThreshold) {
+        return drflac__seek_forward_by_pcm_frames(pFlac, pcmFrameIndex) == pcmFrameIndex;
+    }
+
+    /*
+    Our starting byte range is the byte position of the first FLAC frame and the approximate end of the file as if it were completely uncompressed. This ensures
+    the entire file is included, even though most of the time it'll exceed the end of the actual stream. This is OK as the frame searching logic will handle it.
+    */
+    byteRangeLo = pFlac->firstFLACFramePosInBytes;
+    byteRangeHi = pFlac->firstFLACFramePosInBytes + (drflac_uint64)((drflac_int64)(pFlac->totalPCMFrameCount * pFlac->channels * pFlac->bitsPerSample)/8.0f);
+
+    return drflac__seek_to_pcm_frame__binary_search_internal(pFlac, pcmFrameIndex, byteRangeLo, byteRangeHi);
+}
+#endif  /* !DR_FLAC_NO_CRC */
+
+static drflac_bool32 drflac__seek_to_pcm_frame__seek_table(drflac* pFlac, drflac_uint64 pcmFrameIndex)
+{
+    drflac_uint32 iClosestSeekpoint = 0;
+    drflac_bool32 isMidFrame = DRFLAC_FALSE;
+    drflac_uint64 runningPCMFrameCount;
+    drflac_uint32 iSeekpoint;
+
+
+    DRFLAC_ASSERT(pFlac != NULL);
+
+    if (pFlac->pSeekpoints == NULL || pFlac->seekpointCount == 0) {
+        return DRFLAC_FALSE;
+    }
+
+    for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) {
+        if (pFlac->pSeekpoints[iSeekpoint].firstPCMFrame >= pcmFrameIndex) {
+            break;
+        }
+
+        iClosestSeekpoint = iSeekpoint;
+    }
+
+    /* There's been cases where the seek table contains only zeros. We need to do some basic validation on the closest seekpoint. */
+    if (pFlac->pSeekpoints[iClosestSeekpoint].pcmFrameCount == 0 || pFlac->pSeekpoints[iClosestSeekpoint].pcmFrameCount > pFlac->maxBlockSizeInPCMFrames) {
+        return DRFLAC_FALSE;
+    }
+    if (pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame > pFlac->totalPCMFrameCount && pFlac->totalPCMFrameCount > 0) {
+        return DRFLAC_FALSE;
+    }
+
+#if !defined(DR_FLAC_NO_CRC)
+    /* At this point we should know the closest seek point. We can use a binary search for this. We need to know the total sample count for this. */
+    if (pFlac->totalPCMFrameCount > 0) {
+        drflac_uint64 byteRangeLo;
+        drflac_uint64 byteRangeHi;
+
+        byteRangeHi = pFlac->firstFLACFramePosInBytes + (drflac_uint64)((drflac_int64)(pFlac->totalPCMFrameCount * pFlac->channels * pFlac->bitsPerSample)/8.0f);
+        byteRangeLo = pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset;
+
+        /*
+        If our closest seek point is not the last one, we only need to search between it and the next one. The section below calculates an appropriate starting
+        value for byteRangeHi which will clamp it appropriately.
+
+        Note that the next seekpoint must have an offset greater than the closest seekpoint because otherwise our binary search algorithm will break down. There
+        have been cases where a seektable consists of seek points where every byte offset is set to 0 which causes problems. If this happens we need to abort.
+        */
+        if (iClosestSeekpoint < pFlac->seekpointCount-1) {
+            drflac_uint32 iNextSeekpoint = iClosestSeekpoint + 1;
+
+            /* Basic validation on the seekpoints to ensure they're usable. */
+            if (pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset >= pFlac->pSeekpoints[iNextSeekpoint].flacFrameOffset || pFlac->pSeekpoints[iNextSeekpoint].pcmFrameCount == 0) {
+                return DRFLAC_FALSE;    /* The next seekpoint doesn't look right. The seek table cannot be trusted from here. Abort. */
+            }
+
+            if (pFlac->pSeekpoints[iNextSeekpoint].firstPCMFrame != (((drflac_uint64)0xFFFFFFFF << 32) | 0xFFFFFFFF)) { /* Make sure it's not a placeholder seekpoint. */
+                byteRangeHi = pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iNextSeekpoint].flacFrameOffset - 1; /* byteRangeHi must be zero based. */
+            }
+        }
+
+        if (drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset)) {
+            if (drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+                drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &pFlac->currentPCMFrame, NULL);
+
+                if (drflac__seek_to_pcm_frame__binary_search_internal(pFlac, pcmFrameIndex, byteRangeLo, byteRangeHi)) {
+                    return DRFLAC_TRUE;
+                }
+            }
+        }
+    }
+#endif  /* !DR_FLAC_NO_CRC */
+
+    /* Getting here means we need to use a slower algorithm because the binary search method failed or cannot be used. */
+
+    /*
+    If we are seeking forward and the closest seekpoint is _before_ the current sample, we just seek forward from where we are. Otherwise we start seeking
+    from the seekpoint's first sample.
+    */
+    if (pcmFrameIndex >= pFlac->currentPCMFrame && pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame <= pFlac->currentPCMFrame) {
+        /* Optimized case. Just seek forward from where we are. */
+        runningPCMFrameCount = pFlac->currentPCMFrame;
+
+        /* The frame header for the first frame may not yet have been read. We need to do that if necessary. */
+        if (pFlac->currentPCMFrame == 0 && pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+                return DRFLAC_FALSE;
+            }
+        } else {
+            isMidFrame = DRFLAC_TRUE;
+        }
+    } else {
+        /* Slower case. Seek to the start of the seekpoint and then seek forward from there. */
+        runningPCMFrameCount = pFlac->pSeekpoints[iClosestSeekpoint].firstPCMFrame;
+
+        if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes + pFlac->pSeekpoints[iClosestSeekpoint].flacFrameOffset)) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Grab the frame the seekpoint is sitting on in preparation for the sample-exact seeking below. */
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+    }
+
+    for (;;) {
+        drflac_uint64 pcmFrameCountInThisFLACFrame;
+        drflac_uint64 firstPCMFrameInFLACFrame = 0;
+        drflac_uint64 lastPCMFrameInFLACFrame = 0;
+
+        drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame);
+
+        pcmFrameCountInThisFLACFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
+        if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFLACFrame)) {
+            /*
+            The sample should be in this frame. We need to fully decode it, but if it's an invalid frame (a CRC mismatch) we need to pretend
+            it never existed and keep iterating.
+            */
+            drflac_uint64 pcmFramesToDecode = pcmFrameIndex - runningPCMFrameCount;
+
+            if (!isMidFrame) {
+                drflac_result result = drflac__decode_flac_frame(pFlac);
+                if (result == DRFLAC_SUCCESS) {
+                    /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */
+                    return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;  /* <-- If this fails, something bad has happened (it should never fail). */
+                } else {
+                    if (result == DRFLAC_CRC_MISMATCH) {
+                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
+                    } else {
+                        return DRFLAC_FALSE;
+                    }
+                }
+            } else {
+                /* We started seeking mid-frame which means we need to skip the frame decoding part. */
+                return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;
+            }
+        } else {
+            /*
+            It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
+            frame never existed and leave the running sample count untouched.
+            */
+            if (!isMidFrame) {
+                drflac_result result = drflac__seek_to_next_flac_frame(pFlac);
+                if (result == DRFLAC_SUCCESS) {
+                    runningPCMFrameCount += pcmFrameCountInThisFLACFrame;
+                } else {
+                    if (result == DRFLAC_CRC_MISMATCH) {
+                        goto next_iteration;   /* CRC mismatch. Pretend this frame never existed. */
+                    } else {
+                        return DRFLAC_FALSE;
+                    }
+                }
+            } else {
+                /*
+                We started seeking mid-frame which means we need to seek by reading to the end of the frame instead of with
+                drflac__seek_to_next_flac_frame() which only works if the decoder is sitting on the byte just after the frame header.
+                */
+                runningPCMFrameCount += pFlac->currentFLACFrame.pcmFramesRemaining;
+                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
+                isMidFrame = DRFLAC_FALSE;
+            }
+
+            /* If we are seeking to the end of the file and we've just hit it, we're done. */
+            if (pcmFrameIndex == pFlac->totalPCMFrameCount && runningPCMFrameCount == pFlac->totalPCMFrameCount) {
+                return DRFLAC_TRUE;
+            }
+        }
+
+    next_iteration:
+        /* Grab the next frame in preparation for the next iteration. */
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+    }
+}
+
+
+#ifndef DR_FLAC_NO_OGG
+typedef struct
+{
+    drflac_uint8 capturePattern[4];  /* Should be "OggS" */
+    drflac_uint8 structureVersion;   /* Always 0. */
+    drflac_uint8 headerType;
+    drflac_uint64 granulePosition;
+    drflac_uint32 serialNumber;
+    drflac_uint32 sequenceNumber;
+    drflac_uint32 checksum;
+    drflac_uint8 segmentCount;
+    drflac_uint8 segmentTable[255];
+} drflac_ogg_page_header;
+#endif
+
+typedef struct
+{
+    drflac_read_proc onRead;
+    drflac_seek_proc onSeek;
+    drflac_meta_proc onMeta;
+    drflac_container container;
+    void* pUserData;
+    void* pUserDataMD;
+    drflac_uint32 sampleRate;
+    drflac_uint8  channels;
+    drflac_uint8  bitsPerSample;
+    drflac_uint64 totalPCMFrameCount;
+    drflac_uint16 maxBlockSizeInPCMFrames;
+    drflac_uint64 runningFilePos;
+    drflac_bool32 hasStreamInfoBlock;
+    drflac_bool32 hasMetadataBlocks;
+    drflac_bs bs;                           /* <-- A bit streamer is required for loading data during initialization. */
+    drflac_frame_header firstFrameHeader;   /* <-- The header of the first frame that was read during relaxed initalization. Only set if there is no STREAMINFO block. */
+
+#ifndef DR_FLAC_NO_OGG
+    drflac_uint32 oggSerial;
+    drflac_uint64 oggFirstBytePos;
+    drflac_ogg_page_header oggBosHeader;
+#endif
+} drflac_init_info;
+
+static DRFLAC_INLINE void drflac__decode_block_header(drflac_uint32 blockHeader, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize)
+{
+    blockHeader = drflac__be2host_32(blockHeader);
+    *isLastBlock = (drflac_uint8)((blockHeader & 0x80000000UL) >> 31);
+    *blockType   = (drflac_uint8)((blockHeader & 0x7F000000UL) >> 24);
+    *blockSize   =                (blockHeader & 0x00FFFFFFUL);
+}
+
+static DRFLAC_INLINE drflac_bool32 drflac__read_and_decode_block_header(drflac_read_proc onRead, void* pUserData, drflac_uint8* isLastBlock, drflac_uint8* blockType, drflac_uint32* blockSize)
+{
+    drflac_uint32 blockHeader;
+
+    *blockSize = 0;
+    if (onRead(pUserData, &blockHeader, 4) != 4) {
+        return DRFLAC_FALSE;
+    }
+
+    drflac__decode_block_header(blockHeader, isLastBlock, blockType, blockSize);
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__read_streaminfo(drflac_read_proc onRead, void* pUserData, drflac_streaminfo* pStreamInfo)
+{
+    drflac_uint32 blockSizes;
+    drflac_uint64 frameSizes = 0;
+    drflac_uint64 importantProps;
+    drflac_uint8 md5[16];
+
+    /* min/max block size. */
+    if (onRead(pUserData, &blockSizes, 4) != 4) {
+        return DRFLAC_FALSE;
+    }
+
+    /* min/max frame size. */
+    if (onRead(pUserData, &frameSizes, 6) != 6) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Sample rate, channels, bits per sample and total sample count. */
+    if (onRead(pUserData, &importantProps, 8) != 8) {
+        return DRFLAC_FALSE;
+    }
+
+    /* MD5 */
+    if (onRead(pUserData, md5, sizeof(md5)) != sizeof(md5)) {
+        return DRFLAC_FALSE;
+    }
+
+    blockSizes     = drflac__be2host_32(blockSizes);
+    frameSizes     = drflac__be2host_64(frameSizes);
+    importantProps = drflac__be2host_64(importantProps);
+
+    pStreamInfo->minBlockSizeInPCMFrames = (drflac_uint16)((blockSizes & 0xFFFF0000) >> 16);
+    pStreamInfo->maxBlockSizeInPCMFrames = (drflac_uint16) (blockSizes & 0x0000FFFF);
+    pStreamInfo->minFrameSizeInPCMFrames = (drflac_uint32)((frameSizes     &  (((drflac_uint64)0x00FFFFFF << 16) << 24)) >> 40);
+    pStreamInfo->maxFrameSizeInPCMFrames = (drflac_uint32)((frameSizes     &  (((drflac_uint64)0x00FFFFFF << 16) <<  0)) >> 16);
+    pStreamInfo->sampleRate              = (drflac_uint32)((importantProps &  (((drflac_uint64)0x000FFFFF << 16) << 28)) >> 44);
+    pStreamInfo->channels                = (drflac_uint8 )((importantProps &  (((drflac_uint64)0x0000000E << 16) << 24)) >> 41) + 1;
+    pStreamInfo->bitsPerSample           = (drflac_uint8 )((importantProps &  (((drflac_uint64)0x0000001F << 16) << 20)) >> 36) + 1;
+    pStreamInfo->totalPCMFrameCount      =                ((importantProps & ((((drflac_uint64)0x0000000F << 16) << 16) | 0xFFFFFFFF)));
+    DRFLAC_COPY_MEMORY(pStreamInfo->md5, md5, sizeof(md5));
+
+    return DRFLAC_TRUE;
+}
+
+
+static void* drflac__malloc_default(size_t sz, void* pUserData)
+{
+    (void)pUserData;
+    return DRFLAC_MALLOC(sz);
+}
+
+static void* drflac__realloc_default(void* p, size_t sz, void* pUserData)
+{
+    (void)pUserData;
+    return DRFLAC_REALLOC(p, sz);
+}
+
+static void drflac__free_default(void* p, void* pUserData)
+{
+    (void)pUserData;
+    DRFLAC_FREE(p);
+}
+
+
+static void* drflac__malloc_from_callbacks(size_t sz, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks == NULL) {
+        return NULL;
+    }
+
+    if (pAllocationCallbacks->onMalloc != NULL) {
+        return pAllocationCallbacks->onMalloc(sz, pAllocationCallbacks->pUserData);
+    }
+
+    /* Try using realloc(). */
+    if (pAllocationCallbacks->onRealloc != NULL) {
+        return pAllocationCallbacks->onRealloc(NULL, sz, pAllocationCallbacks->pUserData);
+    }
+
+    return NULL;
+}
+
+static void* drflac__realloc_from_callbacks(void* p, size_t szNew, size_t szOld, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks == NULL) {
+        return NULL;
+    }
+
+    if (pAllocationCallbacks->onRealloc != NULL) {
+        return pAllocationCallbacks->onRealloc(p, szNew, pAllocationCallbacks->pUserData);
+    }
+
+    /* Try emulating realloc() in terms of malloc()/free(). */
+    if (pAllocationCallbacks->onMalloc != NULL && pAllocationCallbacks->onFree != NULL) {
+        void* p2;
+
+        p2 = pAllocationCallbacks->onMalloc(szNew, pAllocationCallbacks->pUserData);
+        if (p2 == NULL) {
+            return NULL;
+        }
+
+        if (p != NULL) {
+            DRFLAC_COPY_MEMORY(p2, p, szOld);
+            pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
+        }
+
+        return p2;
+    }
+
+    return NULL;
+}
+
+static void drflac__free_from_callbacks(void* p, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    if (p == NULL || pAllocationCallbacks == NULL) {
+        return;
+    }
+
+    if (pAllocationCallbacks->onFree != NULL) {
+        pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
+    }
+}
+
+
+static drflac_bool32 drflac__read_and_decode_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_uint64* pFirstFramePos, drflac_uint64* pSeektablePos, drflac_uint32* pSeektableSize, drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    /*
+    We want to keep track of the byte position in the stream of the seektable. At the time of calling this function we know that
+    we'll be sitting on byte 42.
+    */
+    drflac_uint64 runningFilePos = 42;
+    drflac_uint64 seektablePos   = 0;
+    drflac_uint32 seektableSize  = 0;
+
+    for (;;) {
+        drflac_metadata metadata;
+        drflac_uint8 isLastBlock = 0;
+        drflac_uint8 blockType;
+        drflac_uint32 blockSize;
+        if (drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize) == DRFLAC_FALSE) {
+            return DRFLAC_FALSE;
+        }
+        runningFilePos += 4;
+
+        metadata.type = blockType;
+        metadata.pRawData = NULL;
+        metadata.rawDataSize = 0;
+
+        switch (blockType)
+        {
+            case DRFLAC_METADATA_BLOCK_TYPE_APPLICATION:
+            {
+                if (blockSize < 4) {
+                    return DRFLAC_FALSE;
+                }
+
+                if (onMeta) {
+                    void* pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+                    metadata.data.application.id       = drflac__be2host_32(*(drflac_uint32*)pRawData);
+                    metadata.data.application.pData    = (const void*)((drflac_uint8*)pRawData + sizeof(drflac_uint32));
+                    metadata.data.application.dataSize = blockSize - sizeof(drflac_uint32);
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_SEEKTABLE:
+            {
+                seektablePos  = runningFilePos;
+                seektableSize = blockSize;
+
+                if (onMeta) {
+                    drflac_uint32 iSeekpoint;
+                    void* pRawData;
+
+                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+                    metadata.data.seektable.seekpointCount = blockSize/sizeof(drflac_seekpoint);
+                    metadata.data.seektable.pSeekpoints = (const drflac_seekpoint*)pRawData;
+
+                    /* Endian swap. */
+                    for (iSeekpoint = 0; iSeekpoint < metadata.data.seektable.seekpointCount; ++iSeekpoint) {
+                        drflac_seekpoint* pSeekpoint = (drflac_seekpoint*)pRawData + iSeekpoint;
+                        pSeekpoint->firstPCMFrame   = drflac__be2host_64(pSeekpoint->firstPCMFrame);
+                        pSeekpoint->flacFrameOffset = drflac__be2host_64(pSeekpoint->flacFrameOffset);
+                        pSeekpoint->pcmFrameCount   = drflac__be2host_16(pSeekpoint->pcmFrameCount);
+                    }
+
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_VORBIS_COMMENT:
+            {
+                if (blockSize < 8) {
+                    return DRFLAC_FALSE;
+                }
+
+                if (onMeta) {
+                    void* pRawData;
+                    const char* pRunningData;
+                    const char* pRunningDataEnd;
+                    drflac_uint32 i;
+
+                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+
+                    pRunningData    = (const char*)pRawData;
+                    pRunningDataEnd = (const char*)pRawData + blockSize;
+
+                    metadata.data.vorbis_comment.vendorLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+
+                    /* Need space for the rest of the block */
+                    if ((pRunningDataEnd - pRunningData) - 4 < (drflac_int64)metadata.data.vorbis_comment.vendorLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+                    metadata.data.vorbis_comment.vendor       = pRunningData;                                            pRunningData += metadata.data.vorbis_comment.vendorLength;
+                    metadata.data.vorbis_comment.commentCount = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+
+                    /* Need space for 'commentCount' comments after the block, which at minimum is a drflac_uint32 per comment */
+                    if ((pRunningDataEnd - pRunningData) / sizeof(drflac_uint32) < metadata.data.vorbis_comment.commentCount) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+                    metadata.data.vorbis_comment.pComments    = pRunningData;
+
+                    /* Check that the comments section is valid before passing it to the callback */
+                    for (i = 0; i < metadata.data.vorbis_comment.commentCount; ++i) {
+                        drflac_uint32 commentLength;
+
+                        if (pRunningDataEnd - pRunningData < 4) {
+                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                            return DRFLAC_FALSE;
+                        }
+
+                        commentLength = drflac__le2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                        if (pRunningDataEnd - pRunningData < (drflac_int64)commentLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                            return DRFLAC_FALSE;
+                        }
+                        pRunningData += commentLength;
+                    }
+
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_CUESHEET:
+            {
+                if (blockSize < 396) {
+                    return DRFLAC_FALSE;
+                }
+
+                if (onMeta) {
+                    void* pRawData;
+                    const char* pRunningData;
+                    const char* pRunningDataEnd;
+                    drflac_uint8 iTrack;
+                    drflac_uint8 iIndex;
+
+                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+
+                    pRunningData    = (const char*)pRawData;
+                    pRunningDataEnd = (const char*)pRawData + blockSize;
+
+                    DRFLAC_COPY_MEMORY(metadata.data.cuesheet.catalog, pRunningData, 128);                              pRunningData += 128;
+                    metadata.data.cuesheet.leadInSampleCount = drflac__be2host_64(*(const drflac_uint64*)pRunningData); pRunningData += 8;
+                    metadata.data.cuesheet.isCD              = (pRunningData[0] & 0x80) != 0;                           pRunningData += 259;
+                    metadata.data.cuesheet.trackCount        = pRunningData[0];                                         pRunningData += 1;
+                    metadata.data.cuesheet.pTrackData        = pRunningData;
+
+                    /* Check that the cuesheet tracks are valid before passing it to the callback */
+                    for (iTrack = 0; iTrack < metadata.data.cuesheet.trackCount; ++iTrack) {
+                        drflac_uint8 indexCount;
+                        drflac_uint32 indexPointSize;
+
+                        if (pRunningDataEnd - pRunningData < 36) {
+                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                            return DRFLAC_FALSE;
+                        }
+
+                        /* Skip to the index point count */
+                        pRunningData += 35;
+                        indexCount = pRunningData[0]; pRunningData += 1;
+                        indexPointSize = indexCount * sizeof(drflac_cuesheet_track_index);
+                        if (pRunningDataEnd - pRunningData < (drflac_int64)indexPointSize) {
+                            drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                            return DRFLAC_FALSE;
+                        }
+
+                        /* Endian swap. */
+                        for (iIndex = 0; iIndex < indexCount; ++iIndex) {
+                            drflac_cuesheet_track_index* pTrack = (drflac_cuesheet_track_index*)pRunningData;
+                            pRunningData += sizeof(drflac_cuesheet_track_index);
+                            pTrack->offset = drflac__be2host_64(pTrack->offset);
+                        }
+                    }
+
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_PICTURE:
+            {
+                if (blockSize < 32) {
+                    return DRFLAC_FALSE;
+                }
+
+                if (onMeta) {
+                    void* pRawData;
+                    const char* pRunningData;
+                    const char* pRunningDataEnd;
+
+                    pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+
+                    pRunningData    = (const char*)pRawData;
+                    pRunningDataEnd = (const char*)pRawData + blockSize;
+
+                    metadata.data.picture.type       = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.mimeLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+
+                    /* Need space for the rest of the block */
+                    if ((pRunningDataEnd - pRunningData) - 24 < (drflac_int64)metadata.data.picture.mimeLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+                    metadata.data.picture.mime              = pRunningData;                                            pRunningData += metadata.data.picture.mimeLength;
+                    metadata.data.picture.descriptionLength = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+
+                    /* Need space for the rest of the block */
+                    if ((pRunningDataEnd - pRunningData) - 20 < (drflac_int64)metadata.data.picture.descriptionLength) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+                    metadata.data.picture.description     = pRunningData;                                            pRunningData += metadata.data.picture.descriptionLength;
+                    metadata.data.picture.width           = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.height          = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.colorDepth      = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.indexColorCount = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.pictureDataSize = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+                    metadata.data.picture.pPictureData    = (const drflac_uint8*)pRunningData;
+
+                    /* Need space for the picture after the block */
+                    if (pRunningDataEnd - pRunningData < (drflac_int64)metadata.data.picture.pictureDataSize) { /* <-- Note the order of operations to avoid overflow to a valid value */
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_PADDING:
+            {
+                if (onMeta) {
+                    metadata.data.padding.unused = 0;
+
+                    /* Padding doesn't have anything meaningful in it, so just skip over it, but make sure the caller is aware of it by firing the callback. */
+                    if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
+                        isLastBlock = DRFLAC_TRUE;  /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */
+                    } else {
+                        onMeta(pUserDataMD, &metadata);
+                    }
+                }
+            } break;
+
+            case DRFLAC_METADATA_BLOCK_TYPE_INVALID:
+            {
+                /* Invalid chunk. Just skip over this one. */
+                if (onMeta) {
+                    if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
+                        isLastBlock = DRFLAC_TRUE;  /* An error occurred while seeking. Attempt to recover by treating this as the last block which will in turn terminate the loop. */
+                    }
+                }
+            } break;
+
+            default:
+            {
+                /*
+                It's an unknown chunk, but not necessarily invalid. There's a chance more metadata blocks might be defined later on, so we
+                can at the very least report the chunk to the application and let it look at the raw data.
+                */
+                if (onMeta) {
+                    void* pRawData = drflac__malloc_from_callbacks(blockSize, pAllocationCallbacks);
+                    if (pRawData == NULL) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (onRead(pUserData, pRawData, blockSize) != blockSize) {
+                        drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                        return DRFLAC_FALSE;
+                    }
+
+                    metadata.pRawData = pRawData;
+                    metadata.rawDataSize = blockSize;
+                    onMeta(pUserDataMD, &metadata);
+
+                    drflac__free_from_callbacks(pRawData, pAllocationCallbacks);
+                }
+            } break;
+        }
+
+        /* If we're not handling metadata, just skip over the block. If we are, it will have been handled earlier in the switch statement above. */
+        if (onMeta == NULL && blockSize > 0) {
+            if (!onSeek(pUserData, blockSize, drflac_seek_origin_current)) {
+                isLastBlock = DRFLAC_TRUE;
+            }
+        }
+
+        runningFilePos += blockSize;
+        if (isLastBlock) {
+            break;
+        }
+    }
+
+    *pSeektablePos = seektablePos;
+    *pSeektableSize = seektableSize;
+    *pFirstFramePos = runningFilePos;
+
+    return DRFLAC_TRUE;
+}
+
+static drflac_bool32 drflac__init_private__native(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed)
+{
+    /* Pre Condition: The bit stream should be sitting just past the 4-byte id header. */
+
+    drflac_uint8 isLastBlock;
+    drflac_uint8 blockType;
+    drflac_uint32 blockSize;
+
+    (void)onSeek;
+
+    pInit->container = drflac_container_native;
+
+    /* The first metadata block should be the STREAMINFO block. */
+    if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) {
+        return DRFLAC_FALSE;
+    }
+
+    if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) {
+        if (!relaxed) {
+            /* We're opening in strict mode and the first block is not the STREAMINFO block. Error. */
+            return DRFLAC_FALSE;
+        } else {
+            /*
+            Relaxed mode. To open from here we need to just find the first frame and set the sample rate, etc. to whatever is defined
+            for that frame.
+            */
+            pInit->hasStreamInfoBlock = DRFLAC_FALSE;
+            pInit->hasMetadataBlocks  = DRFLAC_FALSE;
+
+            if (!drflac__read_next_flac_frame_header(&pInit->bs, 0, &pInit->firstFrameHeader)) {
+                return DRFLAC_FALSE;    /* Couldn't find a frame. */
+            }
+
+            if (pInit->firstFrameHeader.bitsPerSample == 0) {
+                return DRFLAC_FALSE;    /* Failed to initialize because the first frame depends on the STREAMINFO block, which does not exist. */
+            }
+
+            pInit->sampleRate              = pInit->firstFrameHeader.sampleRate;
+            pInit->channels                = drflac__get_channel_count_from_channel_assignment(pInit->firstFrameHeader.channelAssignment);
+            pInit->bitsPerSample           = pInit->firstFrameHeader.bitsPerSample;
+            pInit->maxBlockSizeInPCMFrames = 65535;   /* <-- See notes here: https://xiph.org/flac/format.html#metadata_block_streaminfo */
+            return DRFLAC_TRUE;
+        }
+    } else {
+        drflac_streaminfo streaminfo;
+        if (!drflac__read_streaminfo(onRead, pUserData, &streaminfo)) {
+            return DRFLAC_FALSE;
+        }
+
+        pInit->hasStreamInfoBlock      = DRFLAC_TRUE;
+        pInit->sampleRate              = streaminfo.sampleRate;
+        pInit->channels                = streaminfo.channels;
+        pInit->bitsPerSample           = streaminfo.bitsPerSample;
+        pInit->totalPCMFrameCount      = streaminfo.totalPCMFrameCount;
+        pInit->maxBlockSizeInPCMFrames = streaminfo.maxBlockSizeInPCMFrames;    /* Don't care about the min block size - only the max (used for determining the size of the memory allocation). */
+        pInit->hasMetadataBlocks       = !isLastBlock;
+
+        if (onMeta) {
+            drflac_metadata metadata;
+            metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO;
+            metadata.pRawData = NULL;
+            metadata.rawDataSize = 0;
+            metadata.data.streaminfo = streaminfo;
+            onMeta(pUserDataMD, &metadata);
+        }
+
+        return DRFLAC_TRUE;
+    }
+}
+
+#ifndef DR_FLAC_NO_OGG
+#define DRFLAC_OGG_MAX_PAGE_SIZE            65307
+#define DRFLAC_OGG_CAPTURE_PATTERN_CRC32    1605413199  /* CRC-32 of "OggS". */
+
+typedef enum
+{
+    drflac_ogg_recover_on_crc_mismatch,
+    drflac_ogg_fail_on_crc_mismatch
+} drflac_ogg_crc_mismatch_recovery;
+
+#ifndef DR_FLAC_NO_CRC
+static drflac_uint32 drflac__crc32_table[] = {
+    0x00000000L, 0x04C11DB7L, 0x09823B6EL, 0x0D4326D9L,
+    0x130476DCL, 0x17C56B6BL, 0x1A864DB2L, 0x1E475005L,
+    0x2608EDB8L, 0x22C9F00FL, 0x2F8AD6D6L, 0x2B4BCB61L,
+    0x350C9B64L, 0x31CD86D3L, 0x3C8EA00AL, 0x384FBDBDL,
+    0x4C11DB70L, 0x48D0C6C7L, 0x4593E01EL, 0x4152FDA9L,
+    0x5F15ADACL, 0x5BD4B01BL, 0x569796C2L, 0x52568B75L,
+    0x6A1936C8L, 0x6ED82B7FL, 0x639B0DA6L, 0x675A1011L,
+    0x791D4014L, 0x7DDC5DA3L, 0x709F7B7AL, 0x745E66CDL,
+    0x9823B6E0L, 0x9CE2AB57L, 0x91A18D8EL, 0x95609039L,
+    0x8B27C03CL, 0x8FE6DD8BL, 0x82A5FB52L, 0x8664E6E5L,
+    0xBE2B5B58L, 0xBAEA46EFL, 0xB7A96036L, 0xB3687D81L,
+    0xAD2F2D84L, 0xA9EE3033L, 0xA4AD16EAL, 0xA06C0B5DL,
+    0xD4326D90L, 0xD0F37027L, 0xDDB056FEL, 0xD9714B49L,
+    0xC7361B4CL, 0xC3F706FBL, 0xCEB42022L, 0xCA753D95L,
+    0xF23A8028L, 0xF6FB9D9FL, 0xFBB8BB46L, 0xFF79A6F1L,
+    0xE13EF6F4L, 0xE5FFEB43L, 0xE8BCCD9AL, 0xEC7DD02DL,
+    0x34867077L, 0x30476DC0L, 0x3D044B19L, 0x39C556AEL,
+    0x278206ABL, 0x23431B1CL, 0x2E003DC5L, 0x2AC12072L,
+    0x128E9DCFL, 0x164F8078L, 0x1B0CA6A1L, 0x1FCDBB16L,
+    0x018AEB13L, 0x054BF6A4L, 0x0808D07DL, 0x0CC9CDCAL,
+    0x7897AB07L, 0x7C56B6B0L, 0x71159069L, 0x75D48DDEL,
+    0x6B93DDDBL, 0x6F52C06CL, 0x6211E6B5L, 0x66D0FB02L,
+    0x5E9F46BFL, 0x5A5E5B08L, 0x571D7DD1L, 0x53DC6066L,
+    0x4D9B3063L, 0x495A2DD4L, 0x44190B0DL, 0x40D816BAL,
+    0xACA5C697L, 0xA864DB20L, 0xA527FDF9L, 0xA1E6E04EL,
+    0xBFA1B04BL, 0xBB60ADFCL, 0xB6238B25L, 0xB2E29692L,
+    0x8AAD2B2FL, 0x8E6C3698L, 0x832F1041L, 0x87EE0DF6L,
+    0x99A95DF3L, 0x9D684044L, 0x902B669DL, 0x94EA7B2AL,
+    0xE0B41DE7L, 0xE4750050L, 0xE9362689L, 0xEDF73B3EL,
+    0xF3B06B3BL, 0xF771768CL, 0xFA325055L, 0xFEF34DE2L,
+    0xC6BCF05FL, 0xC27DEDE8L, 0xCF3ECB31L, 0xCBFFD686L,
+    0xD5B88683L, 0xD1799B34L, 0xDC3ABDEDL, 0xD8FBA05AL,
+    0x690CE0EEL, 0x6DCDFD59L, 0x608EDB80L, 0x644FC637L,
+    0x7A089632L, 0x7EC98B85L, 0x738AAD5CL, 0x774BB0EBL,
+    0x4F040D56L, 0x4BC510E1L, 0x46863638L, 0x42472B8FL,
+    0x5C007B8AL, 0x58C1663DL, 0x558240E4L, 0x51435D53L,
+    0x251D3B9EL, 0x21DC2629L, 0x2C9F00F0L, 0x285E1D47L,
+    0x36194D42L, 0x32D850F5L, 0x3F9B762CL, 0x3B5A6B9BL,
+    0x0315D626L, 0x07D4CB91L, 0x0A97ED48L, 0x0E56F0FFL,
+    0x1011A0FAL, 0x14D0BD4DL, 0x19939B94L, 0x1D528623L,
+    0xF12F560EL, 0xF5EE4BB9L, 0xF8AD6D60L, 0xFC6C70D7L,
+    0xE22B20D2L, 0xE6EA3D65L, 0xEBA91BBCL, 0xEF68060BL,
+    0xD727BBB6L, 0xD3E6A601L, 0xDEA580D8L, 0xDA649D6FL,
+    0xC423CD6AL, 0xC0E2D0DDL, 0xCDA1F604L, 0xC960EBB3L,
+    0xBD3E8D7EL, 0xB9FF90C9L, 0xB4BCB610L, 0xB07DABA7L,
+    0xAE3AFBA2L, 0xAAFBE615L, 0xA7B8C0CCL, 0xA379DD7BL,
+    0x9B3660C6L, 0x9FF77D71L, 0x92B45BA8L, 0x9675461FL,
+    0x8832161AL, 0x8CF30BADL, 0x81B02D74L, 0x857130C3L,
+    0x5D8A9099L, 0x594B8D2EL, 0x5408ABF7L, 0x50C9B640L,
+    0x4E8EE645L, 0x4A4FFBF2L, 0x470CDD2BL, 0x43CDC09CL,
+    0x7B827D21L, 0x7F436096L, 0x7200464FL, 0x76C15BF8L,
+    0x68860BFDL, 0x6C47164AL, 0x61043093L, 0x65C52D24L,
+    0x119B4BE9L, 0x155A565EL, 0x18197087L, 0x1CD86D30L,
+    0x029F3D35L, 0x065E2082L, 0x0B1D065BL, 0x0FDC1BECL,
+    0x3793A651L, 0x3352BBE6L, 0x3E119D3FL, 0x3AD08088L,
+    0x2497D08DL, 0x2056CD3AL, 0x2D15EBE3L, 0x29D4F654L,
+    0xC5A92679L, 0xC1683BCEL, 0xCC2B1D17L, 0xC8EA00A0L,
+    0xD6AD50A5L, 0xD26C4D12L, 0xDF2F6BCBL, 0xDBEE767CL,
+    0xE3A1CBC1L, 0xE760D676L, 0xEA23F0AFL, 0xEEE2ED18L,
+    0xF0A5BD1DL, 0xF464A0AAL, 0xF9278673L, 0xFDE69BC4L,
+    0x89B8FD09L, 0x8D79E0BEL, 0x803AC667L, 0x84FBDBD0L,
+    0x9ABC8BD5L, 0x9E7D9662L, 0x933EB0BBL, 0x97FFAD0CL,
+    0xAFB010B1L, 0xAB710D06L, 0xA6322BDFL, 0xA2F33668L,
+    0xBCB4666DL, 0xB8757BDAL, 0xB5365D03L, 0xB1F740B4L
+};
+#endif
+
+static DRFLAC_INLINE drflac_uint32 drflac_crc32_byte(drflac_uint32 crc32, drflac_uint8 data)
+{
+#ifndef DR_FLAC_NO_CRC
+    return (crc32 << 8) ^ drflac__crc32_table[(drflac_uint8)((crc32 >> 24) & 0xFF) ^ data];
+#else
+    (void)data;
+    return crc32;
+#endif
+}
+
+#if 0
+static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint32(drflac_uint32 crc32, drflac_uint32 data)
+{
+    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 24) & 0xFF));
+    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >> 16) & 0xFF));
+    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >>  8) & 0xFF));
+    crc32 = drflac_crc32_byte(crc32, (drflac_uint8)((data >>  0) & 0xFF));
+    return crc32;
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac_crc32_uint64(drflac_uint32 crc32, drflac_uint64 data)
+{
+    crc32 = drflac_crc32_uint32(crc32, (drflac_uint32)((data >> 32) & 0xFFFFFFFF));
+    crc32 = drflac_crc32_uint32(crc32, (drflac_uint32)((data >>  0) & 0xFFFFFFFF));
+    return crc32;
+}
+#endif
+
+static DRFLAC_INLINE drflac_uint32 drflac_crc32_buffer(drflac_uint32 crc32, drflac_uint8* pData, drflac_uint32 dataSize)
+{
+    /* This can be optimized. */
+    drflac_uint32 i;
+    for (i = 0; i < dataSize; ++i) {
+        crc32 = drflac_crc32_byte(crc32, pData[i]);
+    }
+    return crc32;
+}
+
+
+static DRFLAC_INLINE drflac_bool32 drflac_ogg__is_capture_pattern(drflac_uint8 pattern[4])
+{
+    return pattern[0] == 'O' && pattern[1] == 'g' && pattern[2] == 'g' && pattern[3] == 'S';
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_header_size(drflac_ogg_page_header* pHeader)
+{
+    return 27 + pHeader->segmentCount;
+}
+
+static DRFLAC_INLINE drflac_uint32 drflac_ogg__get_page_body_size(drflac_ogg_page_header* pHeader)
+{
+    drflac_uint32 pageBodySize = 0;
+    int i;
+
+    for (i = 0; i < pHeader->segmentCount; ++i) {
+        pageBodySize += pHeader->segmentTable[i];
+    }
+
+    return pageBodySize;
+}
+
+static drflac_result drflac_ogg__read_page_header_after_capture_pattern(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32)
+{
+    drflac_uint8 data[23];
+    drflac_uint32 i;
+
+    DRFLAC_ASSERT(*pCRC32 == DRFLAC_OGG_CAPTURE_PATTERN_CRC32);
+
+    if (onRead(pUserData, data, 23) != 23) {
+        return DRFLAC_AT_END;
+    }
+    *pBytesRead += 23;
+
+    /*
+    It's not actually used, but set the capture pattern to 'OggS' for completeness. Not doing this will cause static analysers to complain about
+    us trying to access uninitialized data. We could alternatively just comment out this member of the drflac_ogg_page_header structure, but I
+    like to have it map to the structure of the underlying data.
+    */
+    pHeader->capturePattern[0] = 'O';
+    pHeader->capturePattern[1] = 'g';
+    pHeader->capturePattern[2] = 'g';
+    pHeader->capturePattern[3] = 'S';
+
+    pHeader->structureVersion = data[0];
+    pHeader->headerType       = data[1];
+    DRFLAC_COPY_MEMORY(&pHeader->granulePosition, &data[ 2], 8);
+    DRFLAC_COPY_MEMORY(&pHeader->serialNumber,    &data[10], 4);
+    DRFLAC_COPY_MEMORY(&pHeader->sequenceNumber,  &data[14], 4);
+    DRFLAC_COPY_MEMORY(&pHeader->checksum,        &data[18], 4);
+    pHeader->segmentCount     = data[22];
+
+    /* Calculate the CRC. Note that for the calculation the checksum part of the page needs to be set to 0. */
+    data[18] = 0;
+    data[19] = 0;
+    data[20] = 0;
+    data[21] = 0;
+
+    for (i = 0; i < 23; ++i) {
+        *pCRC32 = drflac_crc32_byte(*pCRC32, data[i]);
+    }
+
+
+    if (onRead(pUserData, pHeader->segmentTable, pHeader->segmentCount) != pHeader->segmentCount) {
+        return DRFLAC_AT_END;
+    }
+    *pBytesRead += pHeader->segmentCount;
+
+    for (i = 0; i < pHeader->segmentCount; ++i) {
+        *pCRC32 = drflac_crc32_byte(*pCRC32, pHeader->segmentTable[i]);
+    }
+
+    return DRFLAC_SUCCESS;
+}
+
+static drflac_result drflac_ogg__read_page_header(drflac_read_proc onRead, void* pUserData, drflac_ogg_page_header* pHeader, drflac_uint32* pBytesRead, drflac_uint32* pCRC32)
+{
+    drflac_uint8 id[4];
+
+    *pBytesRead = 0;
+
+    if (onRead(pUserData, id, 4) != 4) {
+        return DRFLAC_AT_END;
+    }
+    *pBytesRead += 4;
+
+    /* We need to read byte-by-byte until we find the OggS capture pattern. */
+    for (;;) {
+        if (drflac_ogg__is_capture_pattern(id)) {
+            drflac_result result;
+
+            *pCRC32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32;
+
+            result = drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, pHeader, pBytesRead, pCRC32);
+            if (result == DRFLAC_SUCCESS) {
+                return DRFLAC_SUCCESS;
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    continue;
+                } else {
+                    return result;
+                }
+            }
+        } else {
+            /* The first 4 bytes did not equal the capture pattern. Read the next byte and try again. */
+            id[0] = id[1];
+            id[1] = id[2];
+            id[2] = id[3];
+            if (onRead(pUserData, &id[3], 1) != 1) {
+                return DRFLAC_AT_END;
+            }
+            *pBytesRead += 1;
+        }
+    }
+}
+
+
+/*
+The main part of the Ogg encapsulation is the conversion from the physical Ogg bitstream to the native FLAC bitstream. It works
+in three general stages: Ogg Physical Bitstream -> Ogg/FLAC Logical Bitstream -> FLAC Native Bitstream. dr_flac is designed
+in such a way that the core sections assume everything is delivered in native format. Therefore, for each encapsulation type
+dr_flac is supporting there needs to be a layer sitting on top of the onRead and onSeek callbacks that ensures the bits read from
+the physical Ogg bitstream are converted and delivered in native FLAC format.
+*/
+typedef struct
+{
+    drflac_read_proc onRead;                /* The original onRead callback from drflac_open() and family. */
+    drflac_seek_proc onSeek;                /* The original onSeek callback from drflac_open() and family. */
+    void* pUserData;                        /* The user data passed on onRead and onSeek. This is the user data that was passed on drflac_open() and family. */
+    drflac_uint64 currentBytePos;           /* The position of the byte we are sitting on in the physical byte stream. Used for efficient seeking. */
+    drflac_uint64 firstBytePos;             /* The position of the first byte in the physical bitstream. Points to the start of the "OggS" identifier of the FLAC bos page. */
+    drflac_uint32 serialNumber;             /* The serial number of the FLAC audio pages. This is determined by the initial header page that was read during initialization. */
+    drflac_ogg_page_header bosPageHeader;   /* Used for seeking. */
+    drflac_ogg_page_header currentPageHeader;
+    drflac_uint32 bytesRemainingInPage;
+    drflac_uint32 pageDataSize;
+    drflac_uint8 pageData[DRFLAC_OGG_MAX_PAGE_SIZE];
+} drflac_oggbs; /* oggbs = Ogg Bitstream */
+
+static size_t drflac_oggbs__read_physical(drflac_oggbs* oggbs, void* bufferOut, size_t bytesToRead)
+{
+    size_t bytesActuallyRead = oggbs->onRead(oggbs->pUserData, bufferOut, bytesToRead);
+    oggbs->currentBytePos += bytesActuallyRead;
+
+    return bytesActuallyRead;
+}
+
+static drflac_bool32 drflac_oggbs__seek_physical(drflac_oggbs* oggbs, drflac_uint64 offset, drflac_seek_origin origin)
+{
+    if (origin == drflac_seek_origin_start) {
+        if (offset <= 0x7FFFFFFF) {
+            if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_start)) {
+                return DRFLAC_FALSE;
+            }
+            oggbs->currentBytePos = offset;
+
+            return DRFLAC_TRUE;
+        } else {
+            if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_start)) {
+                return DRFLAC_FALSE;
+            }
+            oggbs->currentBytePos = offset;
+
+            return drflac_oggbs__seek_physical(oggbs, offset - 0x7FFFFFFF, drflac_seek_origin_current);
+        }
+    } else {
+        while (offset > 0x7FFFFFFF) {
+            if (!oggbs->onSeek(oggbs->pUserData, 0x7FFFFFFF, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;
+            }
+            oggbs->currentBytePos += 0x7FFFFFFF;
+            offset -= 0x7FFFFFFF;
+        }
+
+        if (!oggbs->onSeek(oggbs->pUserData, (int)offset, drflac_seek_origin_current)) {    /* <-- Safe cast thanks to the loop above. */
+            return DRFLAC_FALSE;
+        }
+        oggbs->currentBytePos += offset;
+
+        return DRFLAC_TRUE;
+    }
+}
+
+static drflac_bool32 drflac_oggbs__goto_next_page(drflac_oggbs* oggbs, drflac_ogg_crc_mismatch_recovery recoveryMethod)
+{
+    drflac_ogg_page_header header;
+    for (;;) {
+        drflac_uint32 crc32 = 0;
+        drflac_uint32 bytesRead;
+        drflac_uint32 pageBodySize;
+#ifndef DR_FLAC_NO_CRC
+        drflac_uint32 actualCRC32;
+#endif
+
+        if (drflac_ogg__read_page_header(oggbs->onRead, oggbs->pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
+            return DRFLAC_FALSE;
+        }
+        oggbs->currentBytePos += bytesRead;
+
+        pageBodySize = drflac_ogg__get_page_body_size(&header);
+        if (pageBodySize > DRFLAC_OGG_MAX_PAGE_SIZE) {
+            continue;   /* Invalid page size. Assume it's corrupted and just move to the next page. */
+        }
+
+        if (header.serialNumber != oggbs->serialNumber) {
+            /* It's not a FLAC page. Skip it. */
+            if (pageBodySize > 0 && !drflac_oggbs__seek_physical(oggbs, pageBodySize, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;
+            }
+            continue;
+        }
+
+
+        /* We need to read the entire page and then do a CRC check on it. If there's a CRC mismatch we need to skip this page. */
+        if (drflac_oggbs__read_physical(oggbs, oggbs->pageData, pageBodySize) != pageBodySize) {
+            return DRFLAC_FALSE;
+        }
+        oggbs->pageDataSize = pageBodySize;
+
+#ifndef DR_FLAC_NO_CRC
+        actualCRC32 = drflac_crc32_buffer(crc32, oggbs->pageData, oggbs->pageDataSize);
+        if (actualCRC32 != header.checksum) {
+            if (recoveryMethod == drflac_ogg_recover_on_crc_mismatch) {
+                continue;   /* CRC mismatch. Skip this page. */
+            } else {
+                /*
+                Even though we are failing on a CRC mismatch, we still want our stream to be in a good state. Therefore we
+                go to the next valid page to ensure we're in a good state, but return false to let the caller know that the
+                seek did not fully complete.
+                */
+                drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch);
+                return DRFLAC_FALSE;
+            }
+        }
+#else
+        (void)recoveryMethod;   /* <-- Silence a warning. */
+#endif
+
+        oggbs->currentPageHeader = header;
+        oggbs->bytesRemainingInPage = pageBodySize;
+        return DRFLAC_TRUE;
+    }
+}
+
+/* Function below is unused at the moment, but I might be re-adding it later. */
+#if 0
+static drflac_uint8 drflac_oggbs__get_current_segment_index(drflac_oggbs* oggbs, drflac_uint8* pBytesRemainingInSeg)
+{
+    drflac_uint32 bytesConsumedInPage = drflac_ogg__get_page_body_size(&oggbs->currentPageHeader) - oggbs->bytesRemainingInPage;
+    drflac_uint8 iSeg = 0;
+    drflac_uint32 iByte = 0;
+    while (iByte < bytesConsumedInPage) {
+        drflac_uint8 segmentSize = oggbs->currentPageHeader.segmentTable[iSeg];
+        if (iByte + segmentSize > bytesConsumedInPage) {
+            break;
+        } else {
+            iSeg += 1;
+            iByte += segmentSize;
+        }
+    }
+
+    *pBytesRemainingInSeg = oggbs->currentPageHeader.segmentTable[iSeg] - (drflac_uint8)(bytesConsumedInPage - iByte);
+    return iSeg;
+}
+
+static drflac_bool32 drflac_oggbs__seek_to_next_packet(drflac_oggbs* oggbs)
+{
+    /* The current packet ends when we get to the segment with a lacing value of < 255 which is not at the end of a page. */
+    for (;;) {
+        drflac_bool32 atEndOfPage = DRFLAC_FALSE;
+
+        drflac_uint8 bytesRemainingInSeg;
+        drflac_uint8 iFirstSeg = drflac_oggbs__get_current_segment_index(oggbs, &bytesRemainingInSeg);
+
+        drflac_uint32 bytesToEndOfPacketOrPage = bytesRemainingInSeg;
+        for (drflac_uint8 iSeg = iFirstSeg; iSeg < oggbs->currentPageHeader.segmentCount; ++iSeg) {
+            drflac_uint8 segmentSize = oggbs->currentPageHeader.segmentTable[iSeg];
+            if (segmentSize < 255) {
+                if (iSeg == oggbs->currentPageHeader.segmentCount-1) {
+                    atEndOfPage = DRFLAC_TRUE;
+                }
+
+                break;
+            }
+
+            bytesToEndOfPacketOrPage += segmentSize;
+        }
+
+        /*
+        At this point we will have found either the packet or the end of the page. If were at the end of the page we'll
+        want to load the next page and keep searching for the end of the packet.
+        */
+        drflac_oggbs__seek_physical(oggbs, bytesToEndOfPacketOrPage, drflac_seek_origin_current);
+        oggbs->bytesRemainingInPage -= bytesToEndOfPacketOrPage;
+
+        if (atEndOfPage) {
+            /*
+            We're potentially at the next packet, but we need to check the next page first to be sure because the packet may
+            straddle pages.
+            */
+            if (!drflac_oggbs__goto_next_page(oggbs)) {
+                return DRFLAC_FALSE;
+            }
+
+            /* If it's a fresh packet it most likely means we're at the next packet. */
+            if ((oggbs->currentPageHeader.headerType & 0x01) == 0) {
+                return DRFLAC_TRUE;
+            }
+        } else {
+            /* We're at the next packet. */
+            return DRFLAC_TRUE;
+        }
+    }
+}
+
+static drflac_bool32 drflac_oggbs__seek_to_next_frame(drflac_oggbs* oggbs)
+{
+    /* The bitstream should be sitting on the first byte just after the header of the frame. */
+
+    /* What we're actually doing here is seeking to the start of the next packet. */
+    return drflac_oggbs__seek_to_next_packet(oggbs);
+}
+#endif
+
+static size_t drflac__on_read_ogg(void* pUserData, void* bufferOut, size_t bytesToRead)
+{
+    drflac_oggbs* oggbs = (drflac_oggbs*)pUserData;
+    drflac_uint8* pRunningBufferOut = (drflac_uint8*)bufferOut;
+    size_t bytesRead = 0;
+
+    DRFLAC_ASSERT(oggbs != NULL);
+    DRFLAC_ASSERT(pRunningBufferOut != NULL);
+
+    /* Reading is done page-by-page. If we've run out of bytes in the page we need to move to the next one. */
+    while (bytesRead < bytesToRead) {
+        size_t bytesRemainingToRead = bytesToRead - bytesRead;
+
+        if (oggbs->bytesRemainingInPage >= bytesRemainingToRead) {
+            DRFLAC_COPY_MEMORY(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), bytesRemainingToRead);
+            bytesRead += bytesRemainingToRead;
+            oggbs->bytesRemainingInPage -= (drflac_uint32)bytesRemainingToRead;
+            break;
+        }
+
+        /* If we get here it means some of the requested data is contained in the next pages. */
+        if (oggbs->bytesRemainingInPage > 0) {
+            DRFLAC_COPY_MEMORY(pRunningBufferOut, oggbs->pageData + (oggbs->pageDataSize - oggbs->bytesRemainingInPage), oggbs->bytesRemainingInPage);
+            bytesRead += oggbs->bytesRemainingInPage;
+            pRunningBufferOut += oggbs->bytesRemainingInPage;
+            oggbs->bytesRemainingInPage = 0;
+        }
+
+        DRFLAC_ASSERT(bytesRemainingToRead > 0);
+        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
+            break;  /* Failed to go to the next page. Might have simply hit the end of the stream. */
+        }
+    }
+
+    return bytesRead;
+}
+
+static drflac_bool32 drflac__on_seek_ogg(void* pUserData, int offset, drflac_seek_origin origin)
+{
+    drflac_oggbs* oggbs = (drflac_oggbs*)pUserData;
+    int bytesSeeked = 0;
+
+    DRFLAC_ASSERT(oggbs != NULL);
+    DRFLAC_ASSERT(offset >= 0);  /* <-- Never seek backwards. */
+
+    /* Seeking is always forward which makes things a lot simpler. */
+    if (origin == drflac_seek_origin_start) {
+        if (!drflac_oggbs__seek_physical(oggbs, (int)oggbs->firstBytePos, drflac_seek_origin_start)) {
+            return DRFLAC_FALSE;
+        }
+
+        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) {
+            return DRFLAC_FALSE;
+        }
+
+        return drflac__on_seek_ogg(pUserData, offset, drflac_seek_origin_current);
+    }
+
+    DRFLAC_ASSERT(origin == drflac_seek_origin_current);
+
+    while (bytesSeeked < offset) {
+        int bytesRemainingToSeek = offset - bytesSeeked;
+        DRFLAC_ASSERT(bytesRemainingToSeek >= 0);
+
+        if (oggbs->bytesRemainingInPage >= (size_t)bytesRemainingToSeek) {
+            bytesSeeked += bytesRemainingToSeek;
+            (void)bytesSeeked;  /* <-- Silence a dead store warning emitted by Clang Static Analyzer. */
+            oggbs->bytesRemainingInPage -= bytesRemainingToSeek;
+            break;
+        }
+
+        /* If we get here it means some of the requested data is contained in the next pages. */
+        if (oggbs->bytesRemainingInPage > 0) {
+            bytesSeeked += (int)oggbs->bytesRemainingInPage;
+            oggbs->bytesRemainingInPage = 0;
+        }
+
+        DRFLAC_ASSERT(bytesRemainingToSeek > 0);
+        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_fail_on_crc_mismatch)) {
+            /* Failed to go to the next page. We either hit the end of the stream or had a CRC mismatch. */
+            return DRFLAC_FALSE;
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+
+
+static drflac_bool32 drflac_ogg__seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex)
+{
+    drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
+    drflac_uint64 originalBytePos;
+    drflac_uint64 runningGranulePosition;
+    drflac_uint64 runningFrameBytePos;
+    drflac_uint64 runningPCMFrameCount;
+
+    DRFLAC_ASSERT(oggbs != NULL);
+
+    originalBytePos = oggbs->currentBytePos;   /* For recovery. Points to the OggS identifier. */
+
+    /* First seek to the first frame. */
+    if (!drflac__seek_to_byte(&pFlac->bs, pFlac->firstFLACFramePosInBytes)) {
+        return DRFLAC_FALSE;
+    }
+    oggbs->bytesRemainingInPage = 0;
+
+    runningGranulePosition = 0;
+    for (;;) {
+        if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
+            drflac_oggbs__seek_physical(oggbs, originalBytePos, drflac_seek_origin_start);
+            return DRFLAC_FALSE;   /* Never did find that sample... */
+        }
+
+        runningFrameBytePos = oggbs->currentBytePos - drflac_ogg__get_page_header_size(&oggbs->currentPageHeader) - oggbs->pageDataSize;
+        if (oggbs->currentPageHeader.granulePosition >= pcmFrameIndex) {
+            break; /* The sample is somewhere in the previous page. */
+        }
+
+        /*
+        At this point we know the sample is not in the previous page. It could possibly be in this page. For simplicity we
+        disregard any pages that do not begin a fresh packet.
+        */
+        if ((oggbs->currentPageHeader.headerType & 0x01) == 0) {    /* <-- Is it a fresh page? */
+            if (oggbs->currentPageHeader.segmentTable[0] >= 2) {
+                drflac_uint8 firstBytesInPage[2];
+                firstBytesInPage[0] = oggbs->pageData[0];
+                firstBytesInPage[1] = oggbs->pageData[1];
+
+                if ((firstBytesInPage[0] == 0xFF) && (firstBytesInPage[1] & 0xFC) == 0xF8) {    /* <-- Does the page begin with a frame's sync code? */
+                    runningGranulePosition = oggbs->currentPageHeader.granulePosition;
+                }
+
+                continue;
+            }
+        }
+    }
+
+    /*
+    We found the page that that is closest to the sample, so now we need to find it. The first thing to do is seek to the
+    start of that page. In the loop above we checked that it was a fresh page which means this page is also the start of
+    a new frame. This property means that after we've seeked to the page we can immediately start looping over frames until
+    we find the one containing the target sample.
+    */
+    if (!drflac_oggbs__seek_physical(oggbs, runningFrameBytePos, drflac_seek_origin_start)) {
+        return DRFLAC_FALSE;
+    }
+    if (!drflac_oggbs__goto_next_page(oggbs, drflac_ogg_recover_on_crc_mismatch)) {
+        return DRFLAC_FALSE;
+    }
+
+    /*
+    At this point we'll be sitting on the first byte of the frame header of the first frame in the page. We just keep
+    looping over these frames until we find the one containing the sample we're after.
+    */
+    runningPCMFrameCount = runningGranulePosition;
+    for (;;) {
+        /*
+        There are two ways to find the sample and seek past irrelevant frames:
+          1) Use the native FLAC decoder.
+          2) Use Ogg's framing system.
+
+        Both of these options have their own pros and cons. Using the native FLAC decoder is slower because it needs to
+        do a full decode of the frame. Using Ogg's framing system is faster, but more complicated and involves some code
+        duplication for the decoding of frame headers.
+
+        Another thing to consider is that using the Ogg framing system will perform direct seeking of the physical Ogg
+        bitstream. This is important to consider because it means we cannot read data from the drflac_bs object using the
+        standard drflac__*() APIs because that will read in extra data for its own internal caching which in turn breaks
+        the positioning of the read pointer of the physical Ogg bitstream. Therefore, anything that would normally be read
+        using the native FLAC decoding APIs, such as drflac__read_next_flac_frame_header(), need to be re-implemented so as to
+        avoid the use of the drflac_bs object.
+
+        Considering these issues, I have decided to use the slower native FLAC decoding method for the following reasons:
+          1) Seeking is already partially accelerated using Ogg's paging system in the code block above.
+          2) Seeking in an Ogg encapsulated FLAC stream is probably quite uncommon.
+          3) Simplicity.
+        */
+        drflac_uint64 firstPCMFrameInFLACFrame = 0;
+        drflac_uint64 lastPCMFrameInFLACFrame = 0;
+        drflac_uint64 pcmFrameCountInThisFrame;
+
+        if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+            return DRFLAC_FALSE;
+        }
+
+        drflac__get_pcm_frame_range_of_current_flac_frame(pFlac, &firstPCMFrameInFLACFrame, &lastPCMFrameInFLACFrame);
+
+        pcmFrameCountInThisFrame = (lastPCMFrameInFLACFrame - firstPCMFrameInFLACFrame) + 1;
+
+        /* If we are seeking to the end of the file and we've just hit it, we're done. */
+        if (pcmFrameIndex == pFlac->totalPCMFrameCount && (runningPCMFrameCount + pcmFrameCountInThisFrame) == pFlac->totalPCMFrameCount) {
+            drflac_result result = drflac__decode_flac_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                pFlac->currentPCMFrame = pcmFrameIndex;
+                pFlac->currentFLACFrame.pcmFramesRemaining = 0;
+                return DRFLAC_TRUE;
+            } else {
+                return DRFLAC_FALSE;
+            }
+        }
+
+        if (pcmFrameIndex < (runningPCMFrameCount + pcmFrameCountInThisFrame)) {
+            /*
+            The sample should be in this FLAC frame. We need to fully decode it, however if it's an invalid frame (a CRC mismatch), we need to pretend
+            it never existed and keep iterating.
+            */
+            drflac_result result = drflac__decode_flac_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                /* The frame is valid. We just need to skip over some samples to ensure it's sample-exact. */
+                drflac_uint64 pcmFramesToDecode = (size_t)(pcmFrameIndex - runningPCMFrameCount);    /* <-- Safe cast because the maximum number of samples in a frame is 65535. */
+                if (pcmFramesToDecode == 0) {
+                    return DRFLAC_TRUE;
+                }
+
+                pFlac->currentPCMFrame = runningPCMFrameCount;
+
+                return drflac__seek_forward_by_pcm_frames(pFlac, pcmFramesToDecode) == pcmFramesToDecode;  /* <-- If this fails, something bad has happened (it should never fail). */
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    continue;   /* CRC mismatch. Pretend this frame never existed. */
+                } else {
+                    return DRFLAC_FALSE;
+                }
+            }
+        } else {
+            /*
+            It's not in this frame. We need to seek past the frame, but check if there was a CRC mismatch. If so, we pretend this
+            frame never existed and leave the running sample count untouched.
+            */
+            drflac_result result = drflac__seek_to_next_flac_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                runningPCMFrameCount += pcmFrameCountInThisFrame;
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    continue;   /* CRC mismatch. Pretend this frame never existed. */
+                } else {
+                    return DRFLAC_FALSE;
+                }
+            }
+        }
+    }
+}
+
+
+
+static drflac_bool32 drflac__init_private__ogg(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, void* pUserDataMD, drflac_bool32 relaxed)
+{
+    drflac_ogg_page_header header;
+    drflac_uint32 crc32 = DRFLAC_OGG_CAPTURE_PATTERN_CRC32;
+    drflac_uint32 bytesRead = 0;
+
+    /* Pre Condition: The bit stream should be sitting just past the 4-byte OggS capture pattern. */
+    (void)relaxed;
+
+    pInit->container = drflac_container_ogg;
+    pInit->oggFirstBytePos = 0;
+
+    /*
+    We'll get here if the first 4 bytes of the stream were the OggS capture pattern, however it doesn't necessarily mean the
+    stream includes FLAC encoded audio. To check for this we need to scan the beginning-of-stream page markers and check if
+    any match the FLAC specification. Important to keep in mind that the stream may be multiplexed.
+    */
+    if (drflac_ogg__read_page_header_after_capture_pattern(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
+        return DRFLAC_FALSE;
+    }
+    pInit->runningFilePos += bytesRead;
+
+    for (;;) {
+        int pageBodySize;
+
+        /* Break if we're past the beginning of stream page. */
+        if ((header.headerType & 0x02) == 0) {
+            return DRFLAC_FALSE;
+        }
+
+        /* Check if it's a FLAC header. */
+        pageBodySize = drflac_ogg__get_page_body_size(&header);
+        if (pageBodySize == 51) {   /* 51 = the lacing value of the FLAC header packet. */
+            /* It could be a FLAC page... */
+            drflac_uint32 bytesRemainingInPage = pageBodySize;
+            drflac_uint8 packetType;
+
+            if (onRead(pUserData, &packetType, 1) != 1) {
+                return DRFLAC_FALSE;
+            }
+
+            bytesRemainingInPage -= 1;
+            if (packetType == 0x7F) {
+                /* Increasingly more likely to be a FLAC page... */
+                drflac_uint8 sig[4];
+                if (onRead(pUserData, sig, 4) != 4) {
+                    return DRFLAC_FALSE;
+                }
+
+                bytesRemainingInPage -= 4;
+                if (sig[0] == 'F' && sig[1] == 'L' && sig[2] == 'A' && sig[3] == 'C') {
+                    /* Almost certainly a FLAC page... */
+                    drflac_uint8 mappingVersion[2];
+                    if (onRead(pUserData, mappingVersion, 2) != 2) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (mappingVersion[0] != 1) {
+                        return DRFLAC_FALSE;   /* Only supporting version 1.x of the Ogg mapping. */
+                    }
+
+                    /*
+                    The next 2 bytes are the non-audio packets, not including this one. We don't care about this because we're going to
+                    be handling it in a generic way based on the serial number and packet types.
+                    */
+                    if (!onSeek(pUserData, 2, drflac_seek_origin_current)) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    /* Expecting the native FLAC signature "fLaC". */
+                    if (onRead(pUserData, sig, 4) != 4) {
+                        return DRFLAC_FALSE;
+                    }
+
+                    if (sig[0] == 'f' && sig[1] == 'L' && sig[2] == 'a' && sig[3] == 'C') {
+                        /* The remaining data in the page should be the STREAMINFO block. */
+                        drflac_streaminfo streaminfo;
+                        drflac_uint8 isLastBlock;
+                        drflac_uint8 blockType;
+                        drflac_uint32 blockSize;
+                        if (!drflac__read_and_decode_block_header(onRead, pUserData, &isLastBlock, &blockType, &blockSize)) {
+                            return DRFLAC_FALSE;
+                        }
+
+                        if (blockType != DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO || blockSize != 34) {
+                            return DRFLAC_FALSE;    /* Invalid block type. First block must be the STREAMINFO block. */
+                        }
+
+                        if (drflac__read_streaminfo(onRead, pUserData, &streaminfo)) {
+                            /* Success! */
+                            pInit->hasStreamInfoBlock      = DRFLAC_TRUE;
+                            pInit->sampleRate              = streaminfo.sampleRate;
+                            pInit->channels                = streaminfo.channels;
+                            pInit->bitsPerSample           = streaminfo.bitsPerSample;
+                            pInit->totalPCMFrameCount      = streaminfo.totalPCMFrameCount;
+                            pInit->maxBlockSizeInPCMFrames = streaminfo.maxBlockSizeInPCMFrames;
+                            pInit->hasMetadataBlocks       = !isLastBlock;
+
+                            if (onMeta) {
+                                drflac_metadata metadata;
+                                metadata.type = DRFLAC_METADATA_BLOCK_TYPE_STREAMINFO;
+                                metadata.pRawData = NULL;
+                                metadata.rawDataSize = 0;
+                                metadata.data.streaminfo = streaminfo;
+                                onMeta(pUserDataMD, &metadata);
+                            }
+
+                            pInit->runningFilePos  += pageBodySize;
+                            pInit->oggFirstBytePos  = pInit->runningFilePos - 79;   /* Subtracting 79 will place us right on top of the "OggS" identifier of the FLAC bos page. */
+                            pInit->oggSerial        = header.serialNumber;
+                            pInit->oggBosHeader     = header;
+                            break;
+                        } else {
+                            /* Failed to read STREAMINFO block. Aww, so close... */
+                            return DRFLAC_FALSE;
+                        }
+                    } else {
+                        /* Invalid file. */
+                        return DRFLAC_FALSE;
+                    }
+                } else {
+                    /* Not a FLAC header. Skip it. */
+                    if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) {
+                        return DRFLAC_FALSE;
+                    }
+                }
+            } else {
+                /* Not a FLAC header. Seek past the entire page and move on to the next. */
+                if (!onSeek(pUserData, bytesRemainingInPage, drflac_seek_origin_current)) {
+                    return DRFLAC_FALSE;
+                }
+            }
+        } else {
+            if (!onSeek(pUserData, pageBodySize, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;
+            }
+        }
+
+        pInit->runningFilePos += pageBodySize;
+
+
+        /* Read the header of the next page. */
+        if (drflac_ogg__read_page_header(onRead, pUserData, &header, &bytesRead, &crc32) != DRFLAC_SUCCESS) {
+            return DRFLAC_FALSE;
+        }
+        pInit->runningFilePos += bytesRead;
+    }
+
+    /*
+    If we get here it means we found a FLAC audio stream. We should be sitting on the first byte of the header of the next page. The next
+    packets in the FLAC logical stream contain the metadata. The only thing left to do in the initialization phase for Ogg is to create the
+    Ogg bistream object.
+    */
+    pInit->hasMetadataBlocks = DRFLAC_TRUE;    /* <-- Always have at least VORBIS_COMMENT metadata block. */
+    return DRFLAC_TRUE;
+}
+#endif
+
+static drflac_bool32 drflac__init_private(drflac_init_info* pInit, drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD)
+{
+    drflac_bool32 relaxed;
+    drflac_uint8 id[4];
+
+    if (pInit == NULL || onRead == NULL || onSeek == NULL) {
+        return DRFLAC_FALSE;
+    }
+
+    DRFLAC_ZERO_MEMORY(pInit, sizeof(*pInit));
+    pInit->onRead       = onRead;
+    pInit->onSeek       = onSeek;
+    pInit->onMeta       = onMeta;
+    pInit->container    = container;
+    pInit->pUserData    = pUserData;
+    pInit->pUserDataMD  = pUserDataMD;
+
+    pInit->bs.onRead    = onRead;
+    pInit->bs.onSeek    = onSeek;
+    pInit->bs.pUserData = pUserData;
+    drflac__reset_cache(&pInit->bs);
+
+
+    /* If the container is explicitly defined then we can try opening in relaxed mode. */
+    relaxed = container != drflac_container_unknown;
+
+    /* Skip over any ID3 tags. */
+    for (;;) {
+        if (onRead(pUserData, id, 4) != 4) {
+            return DRFLAC_FALSE;    /* Ran out of data. */
+        }
+        pInit->runningFilePos += 4;
+
+        if (id[0] == 'I' && id[1] == 'D' && id[2] == '3') {
+            drflac_uint8 header[6];
+            drflac_uint8 flags;
+            drflac_uint32 headerSize;
+
+            if (onRead(pUserData, header, 6) != 6) {
+                return DRFLAC_FALSE;    /* Ran out of data. */
+            }
+            pInit->runningFilePos += 6;
+
+            flags = header[1];
+
+            DRFLAC_COPY_MEMORY(&headerSize, header+2, 4);
+            headerSize = drflac__unsynchsafe_32(drflac__be2host_32(headerSize));
+            if (flags & 0x10) {
+                headerSize += 10;
+            }
+
+            if (!onSeek(pUserData, headerSize, drflac_seek_origin_current)) {
+                return DRFLAC_FALSE;    /* Failed to seek past the tag. */
+            }
+            pInit->runningFilePos += headerSize;
+        } else {
+            break;
+        }
+    }
+
+    if (id[0] == 'f' && id[1] == 'L' && id[2] == 'a' && id[3] == 'C') {
+        return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
+    }
+#ifndef DR_FLAC_NO_OGG
+    if (id[0] == 'O' && id[1] == 'g' && id[2] == 'g' && id[3] == 'S') {
+        return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
+    }
+#endif
+
+    /* If we get here it means we likely don't have a header. Try opening in relaxed mode, if applicable. */
+    if (relaxed) {
+        if (container == drflac_container_native) {
+            return drflac__init_private__native(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
+        }
+#ifndef DR_FLAC_NO_OGG
+        if (container == drflac_container_ogg) {
+            return drflac__init_private__ogg(pInit, onRead, onSeek, onMeta, pUserData, pUserDataMD, relaxed);
+        }
+#endif
+    }
+
+    /* Unsupported container. */
+    return DRFLAC_FALSE;
+}
+
+static void drflac__init_from_info(drflac* pFlac, const drflac_init_info* pInit)
+{
+    DRFLAC_ASSERT(pFlac != NULL);
+    DRFLAC_ASSERT(pInit != NULL);
+
+    DRFLAC_ZERO_MEMORY(pFlac, sizeof(*pFlac));
+    pFlac->bs                      = pInit->bs;
+    pFlac->onMeta                  = pInit->onMeta;
+    pFlac->pUserDataMD             = pInit->pUserDataMD;
+    pFlac->maxBlockSizeInPCMFrames = pInit->maxBlockSizeInPCMFrames;
+    pFlac->sampleRate              = pInit->sampleRate;
+    pFlac->channels                = (drflac_uint8)pInit->channels;
+    pFlac->bitsPerSample           = (drflac_uint8)pInit->bitsPerSample;
+    pFlac->totalPCMFrameCount      = pInit->totalPCMFrameCount;
+    pFlac->container               = pInit->container;
+}
+
+
+static drflac* drflac_open_with_metadata_private(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, void* pUserDataMD, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    static drflac_init_info init;
+    drflac_uint32 allocationSize;
+    drflac_uint32 wholeSIMDVectorCountPerChannel;
+    drflac_uint32 decodedSamplesAllocationSize;
+#ifndef DR_FLAC_NO_OGG
+    static drflac_oggbs oggbs;
+#endif
+    drflac_uint64 firstFramePos;
+    drflac_uint64 seektablePos;
+    drflac_uint32 seektableSize;
+    drflac_allocation_callbacks allocationCallbacks;
+    drflac* pFlac;
+
+    /* CPU support first. */
+    drflac__init_cpu_caps();
+
+    if (!drflac__init_private(&init, onRead, onSeek, onMeta, container, pUserData, pUserDataMD)) {
+        return NULL;
+    }
+
+    if (pAllocationCallbacks != NULL) {
+        allocationCallbacks = *pAllocationCallbacks;
+        if (allocationCallbacks.onFree == NULL || (allocationCallbacks.onMalloc == NULL && allocationCallbacks.onRealloc == NULL)) {
+            return NULL;    /* Invalid allocation callbacks. */
+        }
+    } else {
+        allocationCallbacks.pUserData = NULL;
+        allocationCallbacks.onMalloc  = drflac__malloc_default;
+        allocationCallbacks.onRealloc = drflac__realloc_default;
+        allocationCallbacks.onFree    = drflac__free_default;
+    }
+
+
+    /*
+    The size of the allocation for the drflac object needs to be large enough to fit the following:
+      1) The main members of the drflac structure
+      2) A block of memory large enough to store the decoded samples of the largest frame in the stream
+      3) If the container is Ogg, a drflac_oggbs object
+
+    The complicated part of the allocation is making sure there's enough room the decoded samples, taking into consideration
+    the different SIMD instruction sets.
+    */
+    allocationSize = sizeof(drflac);
+
+    /*
+    The allocation size for decoded frames depends on the number of 32-bit integers that fit inside the largest SIMD vector
+    we are supporting.
+    */
+    if ((init.maxBlockSizeInPCMFrames % (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) == 0) {
+        wholeSIMDVectorCountPerChannel = (init.maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32)));
+    } else {
+        wholeSIMDVectorCountPerChannel = (init.maxBlockSizeInPCMFrames / (DRFLAC_MAX_SIMD_VECTOR_SIZE / sizeof(drflac_int32))) + 1;
+    }
+
+    decodedSamplesAllocationSize = wholeSIMDVectorCountPerChannel * DRFLAC_MAX_SIMD_VECTOR_SIZE * init.channels;
+
+    allocationSize += decodedSamplesAllocationSize;
+    allocationSize += DRFLAC_MAX_SIMD_VECTOR_SIZE;  /* Allocate extra bytes to ensure we have enough for alignment. */
+
+#ifndef DR_FLAC_NO_OGG
+    /* There's additional data required for Ogg streams. */
+    if (init.container == drflac_container_ogg) {
+        allocationSize += sizeof(drflac_oggbs);
+    }
+
+    DRFLAC_ZERO_MEMORY(&oggbs, sizeof(oggbs));
+    if (init.container == drflac_container_ogg) {
+        oggbs.onRead = onRead;
+        oggbs.onSeek = onSeek;
+        oggbs.pUserData = pUserData;
+        oggbs.currentBytePos = init.oggFirstBytePos;
+        oggbs.firstBytePos = init.oggFirstBytePos;
+        oggbs.serialNumber = init.oggSerial;
+        oggbs.bosPageHeader = init.oggBosHeader;
+        oggbs.bytesRemainingInPage = 0;
+    }
+#endif
+
+    /*
+    This part is a bit awkward. We need to load the seektable so that it can be referenced in-memory, but I want the drflac object to
+    consist of only a single heap allocation. To this, the size of the seek table needs to be known, which we determine when reading
+    and decoding the metadata.
+    */
+    firstFramePos = 42;   /* <-- We know we are at byte 42 at this point. */
+    seektablePos  = 0;
+    seektableSize = 0;
+    if (init.hasMetadataBlocks) {
+        drflac_read_proc onReadOverride = onRead;
+        drflac_seek_proc onSeekOverride = onSeek;
+        void* pUserDataOverride = pUserData;
+
+#ifndef DR_FLAC_NO_OGG
+        if (init.container == drflac_container_ogg) {
+            onReadOverride = drflac__on_read_ogg;
+            onSeekOverride = drflac__on_seek_ogg;
+            pUserDataOverride = (void*)&oggbs;
+        }
+#endif
+
+        if (!drflac__read_and_decode_metadata(onReadOverride, onSeekOverride, onMeta, pUserDataOverride, pUserDataMD, &firstFramePos, &seektablePos, &seektableSize, &allocationCallbacks)) {
+            return NULL;
+        }
+
+        allocationSize += seektableSize;
+    }
+
+
+    pFlac = (drflac*)drflac__malloc_from_callbacks(allocationSize, &allocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    drflac__init_from_info(pFlac, &init);
+    pFlac->allocationCallbacks = allocationCallbacks;
+    pFlac->pDecodedSamples = (drflac_int32*)drflac_align((size_t)pFlac->pExtraData, DRFLAC_MAX_SIMD_VECTOR_SIZE);
+
+#ifndef DR_FLAC_NO_OGG
+    if (init.container == drflac_container_ogg) {
+        drflac_oggbs* pInternalOggbs = (drflac_oggbs*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize + seektableSize);
+        *pInternalOggbs = oggbs;
+
+        /* The Ogg bistream needs to be layered on top of the original bitstream. */
+        pFlac->bs.onRead = drflac__on_read_ogg;
+        pFlac->bs.onSeek = drflac__on_seek_ogg;
+        pFlac->bs.pUserData = (void*)pInternalOggbs;
+        pFlac->_oggbs = (void*)pInternalOggbs;
+    }
+#endif
+
+    pFlac->firstFLACFramePosInBytes = firstFramePos;
+
+    /* NOTE: Seektables are not currently compatible with Ogg encapsulation (Ogg has its own accelerated seeking system). I may change this later, so I'm leaving this here for now. */
+#ifndef DR_FLAC_NO_OGG
+    if (init.container == drflac_container_ogg)
+    {
+        pFlac->pSeekpoints = NULL;
+        pFlac->seekpointCount = 0;
+    }
+    else
+#endif
+    {
+        /* If we have a seektable we need to load it now, making sure we move back to where we were previously. */
+        if (seektablePos != 0) {
+            pFlac->seekpointCount = seektableSize / sizeof(*pFlac->pSeekpoints);
+            pFlac->pSeekpoints = (drflac_seekpoint*)((drflac_uint8*)pFlac->pDecodedSamples + decodedSamplesAllocationSize);
+
+            DRFLAC_ASSERT(pFlac->bs.onSeek != NULL);
+            DRFLAC_ASSERT(pFlac->bs.onRead != NULL);
+
+            /* Seek to the seektable, then just read directly into our seektable buffer. */
+            if (pFlac->bs.onSeek(pFlac->bs.pUserData, (int)seektablePos, drflac_seek_origin_start)) {
+                if (pFlac->bs.onRead(pFlac->bs.pUserData, pFlac->pSeekpoints, seektableSize) == seektableSize) {
+                    /* Endian swap. */
+                    drflac_uint32 iSeekpoint;
+                    for (iSeekpoint = 0; iSeekpoint < pFlac->seekpointCount; ++iSeekpoint) {
+                        pFlac->pSeekpoints[iSeekpoint].firstPCMFrame   = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].firstPCMFrame);
+                        pFlac->pSeekpoints[iSeekpoint].flacFrameOffset = drflac__be2host_64(pFlac->pSeekpoints[iSeekpoint].flacFrameOffset);
+                        pFlac->pSeekpoints[iSeekpoint].pcmFrameCount   = drflac__be2host_16(pFlac->pSeekpoints[iSeekpoint].pcmFrameCount);
+                    }
+                } else {
+                    /* Failed to read the seektable. Pretend we don't have one. */
+                    pFlac->pSeekpoints = NULL;
+                    pFlac->seekpointCount = 0;
+                }
+
+                /* We need to seek back to where we were. If this fails it's a critical error. */
+                if (!pFlac->bs.onSeek(pFlac->bs.pUserData, (int)pFlac->firstFLACFramePosInBytes, drflac_seek_origin_start)) {
+                    drflac__free_from_callbacks(pFlac, &allocationCallbacks);
+                    return NULL;
+                }
+            } else {
+                /* Failed to seek to the seektable. Ominous sign, but for now we can just pretend we don't have one. */
+                pFlac->pSeekpoints = NULL;
+                pFlac->seekpointCount = 0;
+            }
+        }
+    }
+
+
+    /*
+    If we get here, but don't have a STREAMINFO block, it means we've opened the stream in relaxed mode and need to decode
+    the first frame.
+    */
+    if (!init.hasStreamInfoBlock) {
+        pFlac->currentFLACFrame.header = init.firstFrameHeader;
+        for (;;) {
+            drflac_result result = drflac__decode_flac_frame(pFlac);
+            if (result == DRFLAC_SUCCESS) {
+                break;
+            } else {
+                if (result == DRFLAC_CRC_MISMATCH) {
+                    if (!drflac__read_next_flac_frame_header(&pFlac->bs, pFlac->bitsPerSample, &pFlac->currentFLACFrame.header)) {
+                        drflac__free_from_callbacks(pFlac, &allocationCallbacks);
+                        return NULL;
+                    }
+                    continue;
+                } else {
+                    drflac__free_from_callbacks(pFlac, &allocationCallbacks);
+                    return NULL;
+                }
+            }
+        }
+    }
+
+    return pFlac;
+}
+
+
+
+#ifndef DR_FLAC_NO_STDIO
+#include <stdio.h>
+#include <wchar.h>      /* For wcslen(), wcsrtombs() */
+
+/* drflac_result_from_errno() is only used for fopen() and wfopen() so putting it inside DR_WAV_NO_STDIO for now. If something else needs this later we can move it out. */
+#include <errno.h>
+static drflac_result drflac_result_from_errno(int e)
+{
+    switch (e)
+    {
+        case 0: return DRFLAC_SUCCESS;
+    #ifdef EPERM
+        case EPERM: return DRFLAC_INVALID_OPERATION;
+    #endif
+    #ifdef ENOENT
+        case ENOENT: return DRFLAC_DOES_NOT_EXIST;
+    #endif
+    #ifdef ESRCH
+        case ESRCH: return DRFLAC_DOES_NOT_EXIST;
+    #endif
+    #ifdef EINTR
+        case EINTR: return DRFLAC_INTERRUPT;
+    #endif
+    #ifdef EIO
+        case EIO: return DRFLAC_IO_ERROR;
+    #endif
+    #ifdef ENXIO
+        case ENXIO: return DRFLAC_DOES_NOT_EXIST;
+    #endif
+    #ifdef E2BIG
+        case E2BIG: return DRFLAC_INVALID_ARGS;
+    #endif
+    #ifdef ENOEXEC
+        case ENOEXEC: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef EBADF
+        case EBADF: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef ECHILD
+        case ECHILD: return DRFLAC_ERROR;
+    #endif
+    #ifdef EAGAIN
+        case EAGAIN: return DRFLAC_UNAVAILABLE;
+    #endif
+    #ifdef ENOMEM
+        case ENOMEM: return DRFLAC_OUT_OF_MEMORY;
+    #endif
+    #ifdef EACCES
+        case EACCES: return DRFLAC_ACCESS_DENIED;
+    #endif
+    #ifdef EFAULT
+        case EFAULT: return DRFLAC_BAD_ADDRESS;
+    #endif
+    #ifdef ENOTBLK
+        case ENOTBLK: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBUSY
+        case EBUSY: return DRFLAC_BUSY;
+    #endif
+    #ifdef EEXIST
+        case EEXIST: return DRFLAC_ALREADY_EXISTS;
+    #endif
+    #ifdef EXDEV
+        case EXDEV: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENODEV
+        case ENODEV: return DRFLAC_DOES_NOT_EXIST;
+    #endif
+    #ifdef ENOTDIR
+        case ENOTDIR: return DRFLAC_NOT_DIRECTORY;
+    #endif
+    #ifdef EISDIR
+        case EISDIR: return DRFLAC_IS_DIRECTORY;
+    #endif
+    #ifdef EINVAL
+        case EINVAL: return DRFLAC_INVALID_ARGS;
+    #endif
+    #ifdef ENFILE
+        case ENFILE: return DRFLAC_TOO_MANY_OPEN_FILES;
+    #endif
+    #ifdef EMFILE
+        case EMFILE: return DRFLAC_TOO_MANY_OPEN_FILES;
+    #endif
+    #ifdef ENOTTY
+        case ENOTTY: return DRFLAC_INVALID_OPERATION;
+    #endif
+    #ifdef ETXTBSY
+        case ETXTBSY: return DRFLAC_BUSY;
+    #endif
+    #ifdef EFBIG
+        case EFBIG: return DRFLAC_TOO_BIG;
+    #endif
+    #ifdef ENOSPC
+        case ENOSPC: return DRFLAC_NO_SPACE;
+    #endif
+    #ifdef ESPIPE
+        case ESPIPE: return DRFLAC_BAD_SEEK;
+    #endif
+    #ifdef EROFS
+        case EROFS: return DRFLAC_ACCESS_DENIED;
+    #endif
+    #ifdef EMLINK
+        case EMLINK: return DRFLAC_TOO_MANY_LINKS;
+    #endif
+    #ifdef EPIPE
+        case EPIPE: return DRFLAC_BAD_PIPE;
+    #endif
+    #ifdef EDOM
+        case EDOM: return DRFLAC_OUT_OF_RANGE;
+    #endif
+    #ifdef ERANGE
+        case ERANGE: return DRFLAC_OUT_OF_RANGE;
+    #endif
+    #ifdef EDEADLK
+        case EDEADLK: return DRFLAC_DEADLOCK;
+    #endif
+    #ifdef ENAMETOOLONG
+        case ENAMETOOLONG: return DRFLAC_PATH_TOO_LONG;
+    #endif
+    #ifdef ENOLCK
+        case ENOLCK: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOSYS
+        case ENOSYS: return DRFLAC_NOT_IMPLEMENTED;
+    #endif
+    #ifdef ENOTEMPTY
+        case ENOTEMPTY: return DRFLAC_DIRECTORY_NOT_EMPTY;
+    #endif
+    #ifdef ELOOP
+        case ELOOP: return DRFLAC_TOO_MANY_LINKS;
+    #endif
+    #ifdef ENOMSG
+        case ENOMSG: return DRFLAC_NO_MESSAGE;
+    #endif
+    #ifdef EIDRM
+        case EIDRM: return DRFLAC_ERROR;
+    #endif
+    #ifdef ECHRNG
+        case ECHRNG: return DRFLAC_ERROR;
+    #endif
+    #ifdef EL2NSYNC
+        case EL2NSYNC: return DRFLAC_ERROR;
+    #endif
+    #ifdef EL3HLT
+        case EL3HLT: return DRFLAC_ERROR;
+    #endif
+    #ifdef EL3RST
+        case EL3RST: return DRFLAC_ERROR;
+    #endif
+    #ifdef ELNRNG
+        case ELNRNG: return DRFLAC_OUT_OF_RANGE;
+    #endif
+    #ifdef EUNATCH
+        case EUNATCH: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOCSI
+        case ENOCSI: return DRFLAC_ERROR;
+    #endif
+    #ifdef EL2HLT
+        case EL2HLT: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBADE
+        case EBADE: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBADR
+        case EBADR: return DRFLAC_ERROR;
+    #endif
+    #ifdef EXFULL
+        case EXFULL: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOANO
+        case ENOANO: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBADRQC
+        case EBADRQC: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBADSLT
+        case EBADSLT: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBFONT
+        case EBFONT: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef ENOSTR
+        case ENOSTR: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENODATA
+        case ENODATA: return DRFLAC_NO_DATA_AVAILABLE;
+    #endif
+    #ifdef ETIME
+        case ETIME: return DRFLAC_TIMEOUT;
+    #endif
+    #ifdef ENOSR
+        case ENOSR: return DRFLAC_NO_DATA_AVAILABLE;
+    #endif
+    #ifdef ENONET
+        case ENONET: return DRFLAC_NO_NETWORK;
+    #endif
+    #ifdef ENOPKG
+        case ENOPKG: return DRFLAC_ERROR;
+    #endif
+    #ifdef EREMOTE
+        case EREMOTE: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOLINK
+        case ENOLINK: return DRFLAC_ERROR;
+    #endif
+    #ifdef EADV
+        case EADV: return DRFLAC_ERROR;
+    #endif
+    #ifdef ESRMNT
+        case ESRMNT: return DRFLAC_ERROR;
+    #endif
+    #ifdef ECOMM
+        case ECOMM: return DRFLAC_ERROR;
+    #endif
+    #ifdef EPROTO
+        case EPROTO: return DRFLAC_ERROR;
+    #endif
+    #ifdef EMULTIHOP
+        case EMULTIHOP: return DRFLAC_ERROR;
+    #endif
+    #ifdef EDOTDOT
+        case EDOTDOT: return DRFLAC_ERROR;
+    #endif
+    #ifdef EBADMSG
+        case EBADMSG: return DRFLAC_BAD_MESSAGE;
+    #endif
+    #ifdef EOVERFLOW
+        case EOVERFLOW: return DRFLAC_TOO_BIG;
+    #endif
+    #ifdef ENOTUNIQ
+        case ENOTUNIQ: return DRFLAC_NOT_UNIQUE;
+    #endif
+    #ifdef EBADFD
+        case EBADFD: return DRFLAC_ERROR;
+    #endif
+    #ifdef EREMCHG
+        case EREMCHG: return DRFLAC_ERROR;
+    #endif
+    #ifdef ELIBACC
+        case ELIBACC: return DRFLAC_ACCESS_DENIED;
+    #endif
+    #ifdef ELIBBAD
+        case ELIBBAD: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef ELIBSCN
+        case ELIBSCN: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef ELIBMAX
+        case ELIBMAX: return DRFLAC_ERROR;
+    #endif
+    #ifdef ELIBEXEC
+        case ELIBEXEC: return DRFLAC_ERROR;
+    #endif
+    #ifdef EILSEQ
+        case EILSEQ: return DRFLAC_INVALID_DATA;
+    #endif
+    #ifdef ERESTART
+        case ERESTART: return DRFLAC_ERROR;
+    #endif
+    #ifdef ESTRPIPE
+        case ESTRPIPE: return DRFLAC_ERROR;
+    #endif
+    #ifdef EUSERS
+        case EUSERS: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOTSOCK
+        case ENOTSOCK: return DRFLAC_NOT_SOCKET;
+    #endif
+    #ifdef EDESTADDRREQ
+        case EDESTADDRREQ: return DRFLAC_NO_ADDRESS;
+    #endif
+    #ifdef EMSGSIZE
+        case EMSGSIZE: return DRFLAC_TOO_BIG;
+    #endif
+    #ifdef EPROTOTYPE
+        case EPROTOTYPE: return DRFLAC_BAD_PROTOCOL;
+    #endif
+    #ifdef ENOPROTOOPT
+        case ENOPROTOOPT: return DRFLAC_PROTOCOL_UNAVAILABLE;
+    #endif
+    #ifdef EPROTONOSUPPORT
+        case EPROTONOSUPPORT: return DRFLAC_PROTOCOL_NOT_SUPPORTED;
+    #endif
+    #ifdef ESOCKTNOSUPPORT
+        case ESOCKTNOSUPPORT: return DRFLAC_SOCKET_NOT_SUPPORTED;
+    #endif
+    #ifdef EOPNOTSUPP
+        case EOPNOTSUPP: return DRFLAC_INVALID_OPERATION;
+    #endif
+    #ifdef EPFNOSUPPORT
+        case EPFNOSUPPORT: return DRFLAC_PROTOCOL_FAMILY_NOT_SUPPORTED;
+    #endif
+    #ifdef EAFNOSUPPORT
+        case EAFNOSUPPORT: return DRFLAC_ADDRESS_FAMILY_NOT_SUPPORTED;
+    #endif
+    #ifdef EADDRINUSE
+        case EADDRINUSE: return DRFLAC_ALREADY_IN_USE;
+    #endif
+    #ifdef EADDRNOTAVAIL
+        case EADDRNOTAVAIL: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENETDOWN
+        case ENETDOWN: return DRFLAC_NO_NETWORK;
+    #endif
+    #ifdef ENETUNREACH
+        case ENETUNREACH: return DRFLAC_NO_NETWORK;
+    #endif
+    #ifdef ENETRESET
+        case ENETRESET: return DRFLAC_NO_NETWORK;
+    #endif
+    #ifdef ECONNABORTED
+        case ECONNABORTED: return DRFLAC_NO_NETWORK;
+    #endif
+    #ifdef ECONNRESET
+        case ECONNRESET: return DRFLAC_CONNECTION_RESET;
+    #endif
+    #ifdef ENOBUFS
+        case ENOBUFS: return DRFLAC_NO_SPACE;
+    #endif
+    #ifdef EISCONN
+        case EISCONN: return DRFLAC_ALREADY_CONNECTED;
+    #endif
+    #ifdef ENOTCONN
+        case ENOTCONN: return DRFLAC_NOT_CONNECTED;
+    #endif
+    #ifdef ESHUTDOWN
+        case ESHUTDOWN: return DRFLAC_ERROR;
+    #endif
+    #ifdef ETOOMANYREFS
+        case ETOOMANYREFS: return DRFLAC_ERROR;
+    #endif
+    #ifdef ETIMEDOUT
+        case ETIMEDOUT: return DRFLAC_TIMEOUT;
+    #endif
+    #ifdef ECONNREFUSED
+        case ECONNREFUSED: return DRFLAC_CONNECTION_REFUSED;
+    #endif
+    #ifdef EHOSTDOWN
+        case EHOSTDOWN: return DRFLAC_NO_HOST;
+    #endif
+    #ifdef EHOSTUNREACH
+        case EHOSTUNREACH: return DRFLAC_NO_HOST;
+    #endif
+    #ifdef EALREADY
+        case EALREADY: return DRFLAC_IN_PROGRESS;
+    #endif
+    #ifdef EINPROGRESS
+        case EINPROGRESS: return DRFLAC_IN_PROGRESS;
+    #endif
+    #ifdef ESTALE
+        case ESTALE: return DRFLAC_INVALID_FILE;
+    #endif
+    #ifdef EUCLEAN
+        case EUCLEAN: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOTNAM
+        case ENOTNAM: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENAVAIL
+        case ENAVAIL: return DRFLAC_ERROR;
+    #endif
+    #ifdef EISNAM
+        case EISNAM: return DRFLAC_ERROR;
+    #endif
+    #ifdef EREMOTEIO
+        case EREMOTEIO: return DRFLAC_IO_ERROR;
+    #endif
+    #ifdef EDQUOT
+        case EDQUOT: return DRFLAC_NO_SPACE;
+    #endif
+    #ifdef ENOMEDIUM
+        case ENOMEDIUM: return DRFLAC_DOES_NOT_EXIST;
+    #endif
+    #ifdef EMEDIUMTYPE
+        case EMEDIUMTYPE: return DRFLAC_ERROR;
+    #endif
+    #ifdef ECANCELED
+        case ECANCELED: return DRFLAC_CANCELLED;
+    #endif
+    #ifdef ENOKEY
+        case ENOKEY: return DRFLAC_ERROR;
+    #endif
+    #ifdef EKEYEXPIRED
+        case EKEYEXPIRED: return DRFLAC_ERROR;
+    #endif
+    #ifdef EKEYREVOKED
+        case EKEYREVOKED: return DRFLAC_ERROR;
+    #endif
+    #ifdef EKEYREJECTED
+        case EKEYREJECTED: return DRFLAC_ERROR;
+    #endif
+    #ifdef EOWNERDEAD
+        case EOWNERDEAD: return DRFLAC_ERROR;
+    #endif
+    #ifdef ENOTRECOVERABLE
+        case ENOTRECOVERABLE: return DRFLAC_ERROR;
+    #endif
+    #ifdef ERFKILL
+        case ERFKILL: return DRFLAC_ERROR;
+    #endif
+    #ifdef EHWPOISON
+        case EHWPOISON: return DRFLAC_ERROR;
+    #endif
+        default: return DRFLAC_ERROR;
+    }
+}
+
+static drflac_result drflac_fopen(FILE** ppFile, const char* pFilePath, const char* pOpenMode)
+{
+#if _MSC_VER && _MSC_VER >= 1400
+    errno_t err;
+#endif
+
+    if (ppFile != NULL) {
+        *ppFile = NULL;  /* Safety. */
+    }
+
+    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
+        return DRFLAC_INVALID_ARGS;
+    }
+
+#if _MSC_VER && _MSC_VER >= 1400
+    err = fopen_s(ppFile, pFilePath, pOpenMode);
+    if (err != 0) {
+        return drflac_result_from_errno(err);
+    }
+#else
+#if defined(_WIN32) || defined(__APPLE__)
+    *ppFile = fopen(pFilePath, pOpenMode);
+#else
+    #if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS == 64 && defined(_LARGEFILE64_SOURCE)
+        *ppFile = fopen64(pFilePath, pOpenMode);
+    #else
+        *ppFile = fopen(pFilePath, pOpenMode);
+    #endif
+#endif
+    if (*ppFile == NULL) {
+        drflac_result result = drflac_result_from_errno(errno);
+        if (result == DRFLAC_SUCCESS) {
+            result = DRFLAC_ERROR;   /* Just a safety check to make sure we never ever return success when pFile == NULL. */
+        }
+
+        return result;
+    }
+#endif
+
+    return DRFLAC_SUCCESS;
+}
+
+/*
+_wfopen() isn't always available in all compilation environments.
+
+    * Windows only.
+    * MSVC seems to support it universally as far back as VC6 from what I can tell (haven't checked further back).
+    * MinGW-64 (both 32- and 64-bit) seems to support it.
+    * MinGW wraps it in !defined(__STRICT_ANSI__).
+
+This can be reviewed as compatibility issues arise. The preference is to use _wfopen_s() and _wfopen() as opposed to the wcsrtombs()
+fallback, so if you notice your compiler not detecting this properly I'm happy to look at adding support.
+*/
+#if defined(_WIN32)
+    #if defined(_MSC_VER) || defined(__MINGW64__) || !defined(__STRICT_ANSI__)
+        #define DRFLAC_HAS_WFOPEN
+    #endif
+#endif
+
+static drflac_result drflac_wfopen(FILE** ppFile, const wchar_t* pFilePath, const wchar_t* pOpenMode, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    if (ppFile != NULL) {
+        *ppFile = NULL;  /* Safety. */
+    }
+
+    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
+        return DRFLAC_INVALID_ARGS;
+    }
+
+#if defined(DRFLAC_HAS_WFOPEN)
+    {
+        /* Use _wfopen() on Windows. */
+    #if defined(_MSC_VER) && _MSC_VER >= 1400
+        errno_t err = _wfopen_s(ppFile, pFilePath, pOpenMode);
+        if (err != 0) {
+            return drflac_result_from_errno(err);
+        }
+    #else
+        *ppFile = _wfopen(pFilePath, pOpenMode);
+        if (*ppFile == NULL) {
+            return drflac_result_from_errno(errno);
+        }
+    #endif
+        (void)pAllocationCallbacks;
+    }
+#else
+    /*
+    Use fopen() on anything other than Windows. Requires a conversion. This is annoying because fopen() is locale specific. The only real way I can
+    think of to do this is with wcsrtombs(). Note that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for
+    maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler error I'll look into improving compatibility.
+    */
+    {
+        mbstate_t mbs;
+        size_t lenMB;
+        const wchar_t* pFilePathTemp = pFilePath;
+        char* pFilePathMB = NULL;
+        char pOpenModeMB[32] = {0};
+
+        /* Get the length first. */
+        DRFLAC_ZERO_OBJECT(&mbs);
+        lenMB = wcsrtombs(NULL, &pFilePathTemp, 0, &mbs);
+        if (lenMB == (size_t)-1) {
+            return drflac_result_from_errno(errno);
+        }
+
+        pFilePathMB = (char*)drflac__malloc_from_callbacks(lenMB + 1, pAllocationCallbacks);
+        if (pFilePathMB == NULL) {
+            return DRFLAC_OUT_OF_MEMORY;
+        }
+
+        pFilePathTemp = pFilePath;
+        DRFLAC_ZERO_OBJECT(&mbs);
+        wcsrtombs(pFilePathMB, &pFilePathTemp, lenMB + 1, &mbs);
+
+        /* The open mode should always consist of ASCII characters so we should be able to do a trivial conversion. */
+        {
+            size_t i = 0;
+            for (;;) {
+                if (pOpenMode[i] == 0) {
+                    pOpenModeMB[i] = '\0';
+                    break;
+                }
+
+                pOpenModeMB[i] = (char)pOpenMode[i];
+                i += 1;
+            }
+        }
+
+        *ppFile = fopen(pFilePathMB, pOpenModeMB);
+
+        drflac__free_from_callbacks(pFilePathMB, pAllocationCallbacks);
+    }
+
+    if (*ppFile == NULL) {
+        return DRFLAC_ERROR;
+    }
+#endif
+
+    return DRFLAC_SUCCESS;
+}
+
+static size_t drflac__on_read_stdio(void* pUserData, void* bufferOut, size_t bytesToRead)
+{
+    return fread(bufferOut, 1, bytesToRead, (FILE*)pUserData);
+}
+
+static drflac_bool32 drflac__on_seek_stdio(void* pUserData, int offset, drflac_seek_origin origin)
+{
+    DRFLAC_ASSERT(offset >= 0);  /* <-- Never seek backwards. */
+
+    return fseek((FILE*)pUserData, offset, (origin == drflac_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0;
+}
+
+
+DRFLAC_API drflac* drflac_open_file(const char* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+    FILE* pFile;
+
+    if (drflac_fopen(&pFile, pFileName, "rb") != DRFLAC_SUCCESS) {
+        return NULL;
+    }
+
+    pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        fclose(pFile);
+        return NULL;
+    }
+
+    return pFlac;
+}
+
+DRFLAC_API drflac* drflac_open_file_w(const wchar_t* pFileName, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+    FILE* pFile;
+
+    if (drflac_wfopen(&pFile, pFileName, L"rb", pAllocationCallbacks) != DRFLAC_SUCCESS) {
+        return NULL;
+    }
+
+    pFlac = drflac_open(drflac__on_read_stdio, drflac__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        fclose(pFile);
+        return NULL;
+    }
+
+    return pFlac;
+}
+
+DRFLAC_API drflac* drflac_open_file_with_metadata(const char* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+    FILE* pFile;
+
+    if (drflac_fopen(&pFile, pFileName, "rb") != DRFLAC_SUCCESS) {
+        return NULL;
+    }
+
+    pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, onMeta, drflac_container_unknown, (void*)pFile, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        fclose(pFile);
+        return pFlac;
+    }
+
+    return pFlac;
+}
+
+DRFLAC_API drflac* drflac_open_file_with_metadata_w(const wchar_t* pFileName, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+    FILE* pFile;
+
+    if (drflac_wfopen(&pFile, pFileName, L"rb", pAllocationCallbacks) != DRFLAC_SUCCESS) {
+        return NULL;
+    }
+
+    pFlac = drflac_open_with_metadata_private(drflac__on_read_stdio, drflac__on_seek_stdio, onMeta, drflac_container_unknown, (void*)pFile, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        fclose(pFile);
+        return pFlac;
+    }
+
+    return pFlac;
+}
+#endif  /* DR_FLAC_NO_STDIO */
+
+static size_t drflac__on_read_memory(void* pUserData, void* bufferOut, size_t bytesToRead)
+{
+    drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData;
+    size_t bytesRemaining;
+
+    DRFLAC_ASSERT(memoryStream != NULL);
+    DRFLAC_ASSERT(memoryStream->dataSize >= memoryStream->currentReadPos);
+
+    bytesRemaining = memoryStream->dataSize - memoryStream->currentReadPos;
+    if (bytesToRead > bytesRemaining) {
+        bytesToRead = bytesRemaining;
+    }
+
+    if (bytesToRead > 0) {
+        DRFLAC_COPY_MEMORY(bufferOut, memoryStream->data + memoryStream->currentReadPos, bytesToRead);
+        memoryStream->currentReadPos += bytesToRead;
+    }
+
+    return bytesToRead;
+}
+
+static drflac_bool32 drflac__on_seek_memory(void* pUserData, int offset, drflac_seek_origin origin)
+{
+    drflac__memory_stream* memoryStream = (drflac__memory_stream*)pUserData;
+
+    DRFLAC_ASSERT(memoryStream != NULL);
+    DRFLAC_ASSERT(offset >= 0); /* <-- Never seek backwards. */
+
+    if (offset > (drflac_int64)memoryStream->dataSize) {
+        return DRFLAC_FALSE;
+    }
+
+    if (origin == drflac_seek_origin_current) {
+        if (memoryStream->currentReadPos + offset <= memoryStream->dataSize) {
+            memoryStream->currentReadPos += offset;
+        } else {
+            return DRFLAC_FALSE;  /* Trying to seek too far forward. */
+        }
+    } else {
+        if ((drflac_uint32)offset <= memoryStream->dataSize) {
+            memoryStream->currentReadPos = offset;
+        } else {
+            return DRFLAC_FALSE;  /* Trying to seek too far forward. */
+        }
+    }
+
+    return DRFLAC_TRUE;
+}
+
+DRFLAC_API drflac* drflac_open_memory(const void* pData, size_t dataSize, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac__memory_stream memoryStream;
+    drflac* pFlac;
+
+    memoryStream.data = (const drflac_uint8*)pData;
+    memoryStream.dataSize = dataSize;
+    memoryStream.currentReadPos = 0;
+    pFlac = drflac_open(drflac__on_read_memory, drflac__on_seek_memory, &memoryStream, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    pFlac->memoryStream = memoryStream;
+
+    /* This is an awful hack... */
+#ifndef DR_FLAC_NO_OGG
+    if (pFlac->container == drflac_container_ogg)
+    {
+        drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
+        oggbs->pUserData = &pFlac->memoryStream;
+    }
+    else
+#endif
+    {
+        pFlac->bs.pUserData = &pFlac->memoryStream;
+    }
+
+    return pFlac;
+}
+
+DRFLAC_API drflac* drflac_open_memory_with_metadata(const void* pData, size_t dataSize, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac__memory_stream memoryStream;
+    drflac* pFlac;
+
+    memoryStream.data = (const drflac_uint8*)pData;
+    memoryStream.dataSize = dataSize;
+    memoryStream.currentReadPos = 0;
+    pFlac = drflac_open_with_metadata_private(drflac__on_read_memory, drflac__on_seek_memory, onMeta, drflac_container_unknown, &memoryStream, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    pFlac->memoryStream = memoryStream;
+
+    /* This is an awful hack... */
+#ifndef DR_FLAC_NO_OGG
+    if (pFlac->container == drflac_container_ogg)
+    {
+        drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
+        oggbs->pUserData = &pFlac->memoryStream;
+    }
+    else
+#endif
+    {
+        pFlac->bs.pUserData = &pFlac->memoryStream;
+    }
+
+    return pFlac;
+}
+
+
+
+DRFLAC_API drflac* drflac_open(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    return drflac_open_with_metadata_private(onRead, onSeek, NULL, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks);
+}
+DRFLAC_API drflac* drflac_open_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    return drflac_open_with_metadata_private(onRead, onSeek, NULL, container, pUserData, pUserData, pAllocationCallbacks);
+}
+
+DRFLAC_API drflac* drflac_open_with_metadata(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    return drflac_open_with_metadata_private(onRead, onSeek, onMeta, drflac_container_unknown, pUserData, pUserData, pAllocationCallbacks);
+}
+DRFLAC_API drflac* drflac_open_with_metadata_relaxed(drflac_read_proc onRead, drflac_seek_proc onSeek, drflac_meta_proc onMeta, drflac_container container, void* pUserData, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    return drflac_open_with_metadata_private(onRead, onSeek, onMeta, container, pUserData, pUserData, pAllocationCallbacks);
+}
+
+DRFLAC_API void drflac_close(drflac* pFlac)
+{
+    if (pFlac == NULL) {
+        return;
+    }
+
+#ifndef DR_FLAC_NO_STDIO
+    /*
+    If we opened the file with drflac_open_file() we will want to close the file handle. We can know whether or not drflac_open_file()
+    was used by looking at the callbacks.
+    */
+    if (pFlac->bs.onRead == drflac__on_read_stdio) {
+        fclose((FILE*)pFlac->bs.pUserData);
+    }
+
+#ifndef DR_FLAC_NO_OGG
+    /* Need to clean up Ogg streams a bit differently due to the way the bit streaming is chained. */
+    if (pFlac->container == drflac_container_ogg) {
+        drflac_oggbs* oggbs = (drflac_oggbs*)pFlac->_oggbs;
+        DRFLAC_ASSERT(pFlac->bs.onRead == drflac__on_read_ogg);
+
+        if (oggbs->onRead == drflac__on_read_stdio) {
+            fclose((FILE*)oggbs->pUserData);
+        }
+    }
+#endif
+#endif
+
+    drflac__free_from_callbacks(pFlac, &pFlac->allocationCallbacks);
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 left  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 side  = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 right0 = left0 - side0;
+        drflac_uint32 right1 = left1 - side1;
+        drflac_uint32 right2 = left2 - side2;
+        drflac_uint32 right3 = left3 - side3;
+
+        pOutputSamples[i*8+0] = (drflac_int32)left0;
+        pOutputSamples[i*8+1] = (drflac_int32)right0;
+        pOutputSamples[i*8+2] = (drflac_int32)left1;
+        pOutputSamples[i*8+3] = (drflac_int32)right1;
+        pOutputSamples[i*8+4] = (drflac_int32)left2;
+        pOutputSamples[i*8+5] = (drflac_int32)right2;
+        pOutputSamples[i*8+6] = (drflac_int32)left3;
+        pOutputSamples[i*8+7] = (drflac_int32)right3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i right = _mm_sub_epi32(left, side);
+
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t left;
+        uint32x4_t side;
+        uint32x4_t right;
+
+        left  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        right = vsubq_u32(left, side);
+
+        drflac__vst2q_u32((drflac_uint32*)pOutputSamples + i*8, vzipq_u32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s32__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 side  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 right = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 side0  = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 side1  = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 side2  = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 side3  = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 left0 = right0 + side0;
+        drflac_uint32 left1 = right1 + side1;
+        drflac_uint32 left2 = right2 + side2;
+        drflac_uint32 left3 = right3 + side3;
+
+        pOutputSamples[i*8+0] = (drflac_int32)left0;
+        pOutputSamples[i*8+1] = (drflac_int32)right0;
+        pOutputSamples[i*8+2] = (drflac_int32)left1;
+        pOutputSamples[i*8+3] = (drflac_int32)right1;
+        pOutputSamples[i*8+4] = (drflac_int32)left2;
+        pOutputSamples[i*8+5] = (drflac_int32)right2;
+        pOutputSamples[i*8+6] = (drflac_int32)left3;
+        pOutputSamples[i*8+7] = (drflac_int32)right3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i left  = _mm_add_epi32(right, side);
+
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t side;
+        uint32x4_t right;
+        uint32x4_t left;
+
+        side  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        left  = vaddq_u32(right, side);
+
+        drflac__vst2q_u32((drflac_uint32*)pOutputSamples + i*8, vzipq_u32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left;
+        pOutputSamples[i*2+1] = (drflac_int32)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s32__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample);
+        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_int32 shift = unusedBitsPerSample;
+
+    if (shift > 0) {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = (mid0 + side0) << shift;
+            temp1L = (mid1 + side1) << shift;
+            temp2L = (mid2 + side2) << shift;
+            temp3L = (mid3 + side3) << shift;
+
+            temp0R = (mid0 - side0) << shift;
+            temp1R = (mid1 - side1) << shift;
+            temp2R = (mid2 - side2) << shift;
+            temp3R = (mid3 - side3) << shift;
+
+            pOutputSamples[i*8+0] = (drflac_int32)temp0L;
+            pOutputSamples[i*8+1] = (drflac_int32)temp0R;
+            pOutputSamples[i*8+2] = (drflac_int32)temp1L;
+            pOutputSamples[i*8+3] = (drflac_int32)temp1R;
+            pOutputSamples[i*8+4] = (drflac_int32)temp2L;
+            pOutputSamples[i*8+5] = (drflac_int32)temp2R;
+            pOutputSamples[i*8+6] = (drflac_int32)temp3L;
+            pOutputSamples[i*8+7] = (drflac_int32)temp3R;
+        }
+    } else {
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = (drflac_uint32)((drflac_int32)(mid0 + side0) >> 1);
+            temp1L = (drflac_uint32)((drflac_int32)(mid1 + side1) >> 1);
+            temp2L = (drflac_uint32)((drflac_int32)(mid2 + side2) >> 1);
+            temp3L = (drflac_uint32)((drflac_int32)(mid3 + side3) >> 1);
+
+            temp0R = (drflac_uint32)((drflac_int32)(mid0 - side0) >> 1);
+            temp1R = (drflac_uint32)((drflac_int32)(mid1 - side1) >> 1);
+            temp2R = (drflac_uint32)((drflac_int32)(mid2 - side2) >> 1);
+            temp3R = (drflac_uint32)((drflac_int32)(mid3 - side3) >> 1);
+
+            pOutputSamples[i*8+0] = (drflac_int32)temp0L;
+            pOutputSamples[i*8+1] = (drflac_int32)temp0R;
+            pOutputSamples[i*8+2] = (drflac_int32)temp1L;
+            pOutputSamples[i*8+3] = (drflac_int32)temp1R;
+            pOutputSamples[i*8+4] = (drflac_int32)temp2L;
+            pOutputSamples[i*8+5] = (drflac_int32)temp2R;
+            pOutputSamples[i*8+6] = (drflac_int32)temp3L;
+            pOutputSamples[i*8+7] = (drflac_int32)temp3R;
+        }
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample);
+        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample);
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_int32 shift = unusedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i left;
+            __m128i right;
+
+            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            left  = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
+            right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
+
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)(mid + side) >> 1;
+            pOutputSamples[i*2+1] = (drflac_int32)(mid - side) >> 1;
+        }
+    } else {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i left;
+            __m128i right;
+
+            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            left  = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
+            right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
+
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift);
+            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift);
+        }
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_int32 shift = unusedBitsPerSample;
+    int32x4_t  wbpsShift0_4; /* wbps = Wasted Bits Per Sample */
+    int32x4_t  wbpsShift1_4; /* wbps = Wasted Bits Per Sample */
+    uint32x4_t one4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    wbpsShift0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+    wbpsShift1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+    one4         = vdupq_n_u32(1);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            uint32x4_t mid;
+            uint32x4_t side;
+            int32x4_t left;
+            int32x4_t right;
+
+            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
+            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
+
+            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, one4));
+
+            left  = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
+            right = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
+
+            drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)(mid + side) >> 1;
+            pOutputSamples[i*2+1] = (drflac_int32)(mid - side) >> 1;
+        }
+    } else {
+        int32x4_t shift4;
+
+        shift -= 1;
+        shift4 = vdupq_n_s32(shift);
+
+        for (i = 0; i < frameCount4; ++i) {
+            uint32x4_t mid;
+            uint32x4_t side;
+            int32x4_t left;
+            int32x4_t right;
+
+            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
+            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
+
+            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, one4));
+
+            left  = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
+            right = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
+
+            drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift);
+            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift);
+        }
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s32__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample));
+        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample));
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1;
+
+        pOutputSamples[i*8+0] = (drflac_int32)tempL0;
+        pOutputSamples[i*8+1] = (drflac_int32)tempR0;
+        pOutputSamples[i*8+2] = (drflac_int32)tempL1;
+        pOutputSamples[i*8+3] = (drflac_int32)tempR1;
+        pOutputSamples[i*8+4] = (drflac_int32)tempL2;
+        pOutputSamples[i*8+5] = (drflac_int32)tempR2;
+        pOutputSamples[i*8+6] = (drflac_int32)tempL3;
+        pOutputSamples[i*8+7] = (drflac_int32)tempR3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0);
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1);
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 0), _mm_unpacklo_epi32(left, right));
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8 + 4), _mm_unpackhi_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0);
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1);
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    int32x4_t shift4_0 = vdupq_n_s32(shift0);
+    int32x4_t shift4_1 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        int32x4_t left;
+        int32x4_t right;
+
+        left  = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift4_0));
+        right = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift4_1));
+
+        drflac__vst2q_s32(pOutputSamples + i*8, vzipq_s32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0);
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s32__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int32* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s32__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s32(drflac* pFlac, drflac_uint64 framesToRead, drflac_int32* pBufferOut)
+{
+    drflac_uint64 framesRead;
+    drflac_uint32 unusedBitsPerSample;
+
+    if (pFlac == NULL || framesToRead == 0) {
+        return 0;
+    }
+
+    if (pBufferOut == NULL) {
+        return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead);
+    }
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 32);
+    unusedBitsPerSample = 32 - pFlac->bitsPerSample;
+
+    framesRead = 0;
+    while (framesToRead > 0) {
+        /* If we've run out of samples in this frame, go to the next. */
+        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                break;  /* Couldn't read the next frame, so just break from the loop and return. */
+            }
+        } else {
+            unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
+            drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining;
+            drflac_uint64 frameCountThisIteration = framesToRead;
+
+            if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) {
+                frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining;
+            }
+
+            if (channelCount == 2) {
+                const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame;
+                const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame;
+
+                switch (pFlac->currentFLACFrame.header.channelAssignment)
+                {
+                    case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE:
+                    {
+                        drflac_read_pcm_frames_s32__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
+                    {
+                        drflac_read_pcm_frames_s32__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE:
+                    {
+                        drflac_read_pcm_frames_s32__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT:
+                    default:
+                    {
+                        drflac_read_pcm_frames_s32__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+                }
+            } else {
+                /* Generic interleaving. */
+                drflac_uint64 i;
+                for (i = 0; i < frameCountThisIteration; ++i) {
+                    unsigned int j;
+                    for (j = 0; j < channelCount; ++j) {
+                        pBufferOut[(i*channelCount)+j] = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample));
+                    }
+                }
+            }
+
+            framesRead                += frameCountThisIteration;
+            pBufferOut                += frameCountThisIteration * channelCount;
+            framesToRead              -= frameCountThisIteration;
+            pFlac->currentPCMFrame    += frameCountThisIteration;
+            pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)frameCountThisIteration;
+        }
+    }
+
+    return framesRead;
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 left  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 side  = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 right = left - side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 right0 = left0 - side0;
+        drflac_uint32 right1 = left1 - side1;
+        drflac_uint32 right2 = left2 - side2;
+        drflac_uint32 right3 = left3 - side3;
+
+        left0  >>= 16;
+        left1  >>= 16;
+        left2  >>= 16;
+        left3  >>= 16;
+
+        right0 >>= 16;
+        right1 >>= 16;
+        right2 >>= 16;
+        right3 >>= 16;
+
+        pOutputSamples[i*8+0] = (drflac_int16)left0;
+        pOutputSamples[i*8+1] = (drflac_int16)right0;
+        pOutputSamples[i*8+2] = (drflac_int16)left1;
+        pOutputSamples[i*8+3] = (drflac_int16)right1;
+        pOutputSamples[i*8+4] = (drflac_int16)left2;
+        pOutputSamples[i*8+5] = (drflac_int16)right2;
+        pOutputSamples[i*8+6] = (drflac_int16)left3;
+        pOutputSamples[i*8+7] = (drflac_int16)right3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i right = _mm_sub_epi32(left, side);
+
+        left  = _mm_srai_epi32(left,  16);
+        right = _mm_srai_epi32(right, 16);
+
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t left;
+        uint32x4_t side;
+        uint32x4_t right;
+
+        left  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        right = vsubq_u32(left, side);
+
+        left  = vshrq_n_u32(left,  16);
+        right = vshrq_n_u32(right, 16);
+
+        drflac__vst2q_u16((drflac_uint16*)pOutputSamples + i*8, vzip_u16(vmovn_u32(left), vmovn_u32(right)));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s16__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s16__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 side  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 right = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 left  = right + side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 side0  = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 side1  = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 side2  = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 side3  = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 left0 = right0 + side0;
+        drflac_uint32 left1 = right1 + side1;
+        drflac_uint32 left2 = right2 + side2;
+        drflac_uint32 left3 = right3 + side3;
+
+        left0  >>= 16;
+        left1  >>= 16;
+        left2  >>= 16;
+        left3  >>= 16;
+
+        right0 >>= 16;
+        right1 >>= 16;
+        right2 >>= 16;
+        right3 >>= 16;
+
+        pOutputSamples[i*8+0] = (drflac_int16)left0;
+        pOutputSamples[i*8+1] = (drflac_int16)right0;
+        pOutputSamples[i*8+2] = (drflac_int16)left1;
+        pOutputSamples[i*8+3] = (drflac_int16)right1;
+        pOutputSamples[i*8+4] = (drflac_int16)left2;
+        pOutputSamples[i*8+5] = (drflac_int16)right2;
+        pOutputSamples[i*8+6] = (drflac_int16)left3;
+        pOutputSamples[i*8+7] = (drflac_int16)right3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i left  = _mm_add_epi32(right, side);
+
+        left  = _mm_srai_epi32(left,  16);
+        right = _mm_srai_epi32(right, 16);
+
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t side;
+        uint32x4_t right;
+        uint32x4_t left;
+
+        side  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        right = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        left  = vaddq_u32(right, side);
+
+        left  = vshrq_n_u32(left,  16);
+        right = vshrq_n_u32(right, 16);
+
+        drflac__vst2q_u16((drflac_uint16*)pOutputSamples + i*8, vzip_u16(vmovn_u32(left), vmovn_u32(right)));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        left  >>= 16;
+        right >>= 16;
+
+        pOutputSamples[i*2+0] = (drflac_int16)left;
+        pOutputSamples[i*2+1] = (drflac_int16)right;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s16__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s16__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        drflac_uint32 mid  = (drflac_uint32)pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = (drflac_uint32)pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) >> 16);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample;
+
+    if (shift > 0) {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = (mid0 + side0) << shift;
+            temp1L = (mid1 + side1) << shift;
+            temp2L = (mid2 + side2) << shift;
+            temp3L = (mid3 + side3) << shift;
+
+            temp0R = (mid0 - side0) << shift;
+            temp1R = (mid1 - side1) << shift;
+            temp2R = (mid2 - side2) << shift;
+            temp3R = (mid3 - side3) << shift;
+
+            temp0L >>= 16;
+            temp1L >>= 16;
+            temp2L >>= 16;
+            temp3L >>= 16;
+
+            temp0R >>= 16;
+            temp1R >>= 16;
+            temp2R >>= 16;
+            temp3R >>= 16;
+
+            pOutputSamples[i*8+0] = (drflac_int16)temp0L;
+            pOutputSamples[i*8+1] = (drflac_int16)temp0R;
+            pOutputSamples[i*8+2] = (drflac_int16)temp1L;
+            pOutputSamples[i*8+3] = (drflac_int16)temp1R;
+            pOutputSamples[i*8+4] = (drflac_int16)temp2L;
+            pOutputSamples[i*8+5] = (drflac_int16)temp2R;
+            pOutputSamples[i*8+6] = (drflac_int16)temp3L;
+            pOutputSamples[i*8+7] = (drflac_int16)temp3R;
+        }
+    } else {
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = ((drflac_int32)(mid0 + side0) >> 1);
+            temp1L = ((drflac_int32)(mid1 + side1) >> 1);
+            temp2L = ((drflac_int32)(mid2 + side2) >> 1);
+            temp3L = ((drflac_int32)(mid3 + side3) >> 1);
+
+            temp0R = ((drflac_int32)(mid0 - side0) >> 1);
+            temp1R = ((drflac_int32)(mid1 - side1) >> 1);
+            temp2R = ((drflac_int32)(mid2 - side2) >> 1);
+            temp3R = ((drflac_int32)(mid3 - side3) >> 1);
+
+            temp0L >>= 16;
+            temp1L >>= 16;
+            temp2L >>= 16;
+            temp3L >>= 16;
+
+            temp0R >>= 16;
+            temp1R >>= 16;
+            temp2R >>= 16;
+            temp3R >>= 16;
+
+            pOutputSamples[i*8+0] = (drflac_int16)temp0L;
+            pOutputSamples[i*8+1] = (drflac_int16)temp0R;
+            pOutputSamples[i*8+2] = (drflac_int16)temp1L;
+            pOutputSamples[i*8+3] = (drflac_int16)temp1R;
+            pOutputSamples[i*8+4] = (drflac_int16)temp2L;
+            pOutputSamples[i*8+5] = (drflac_int16)temp2R;
+            pOutputSamples[i*8+6] = (drflac_int16)temp3L;
+            pOutputSamples[i*8+7] = (drflac_int16)temp3R;
+        }
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)(((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) >> 16);
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i left;
+            __m128i right;
+
+            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            left  = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
+            right = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
+
+            left  = _mm_srai_epi32(left,  16);
+            right = _mm_srai_epi32(right, 16);
+
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int16)(((drflac_int32)(mid + side) >> 1) >> 16);
+            pOutputSamples[i*2+1] = (drflac_int16)(((drflac_int32)(mid - side) >> 1) >> 16);
+        }
+    } else {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i left;
+            __m128i right;
+
+            mid   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid   = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            left  = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
+            right = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
+
+            left  = _mm_srai_epi32(left,  16);
+            right = _mm_srai_epi32(right, 16);
+
+            _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) << shift) >> 16);
+            pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) << shift) >> 16);
+        }
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample;
+    int32x4_t wbpsShift0_4; /* wbps = Wasted Bits Per Sample */
+    int32x4_t wbpsShift1_4; /* wbps = Wasted Bits Per Sample */
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    wbpsShift0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+    wbpsShift1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            uint32x4_t mid;
+            uint32x4_t side;
+            int32x4_t left;
+            int32x4_t right;
+
+            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
+            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
+
+            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
+
+            left  = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
+            right = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
+
+            left  = vshrq_n_s32(left,  16);
+            right = vshrq_n_s32(right, 16);
+
+            drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int16)(((drflac_int32)(mid + side) >> 1) >> 16);
+            pOutputSamples[i*2+1] = (drflac_int16)(((drflac_int32)(mid - side) >> 1) >> 16);
+        }
+    } else {
+        int32x4_t shift4;
+
+        shift -= 1;
+        shift4 = vdupq_n_s32(shift);
+
+        for (i = 0; i < frameCount4; ++i) {
+            uint32x4_t mid;
+            uint32x4_t side;
+            int32x4_t left;
+            int32x4_t right;
+
+            mid   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbpsShift0_4);
+            side  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbpsShift1_4);
+
+            mid   = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
+
+            left  = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
+            right = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
+
+            left  = vshrq_n_s32(left,  16);
+            right = vshrq_n_s32(right, 16);
+
+            drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int16)(((mid + side) << shift) >> 16);
+            pOutputSamples[i*2+1] = (drflac_int16)(((mid - side) << shift) >> 16);
+        }
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s16__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s16__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int16)((drflac_int32)((drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample)) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)((drflac_int32)((drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample)) >> 16);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1;
+
+        tempL0 >>= 16;
+        tempL1 >>= 16;
+        tempL2 >>= 16;
+        tempL3 >>= 16;
+
+        tempR0 >>= 16;
+        tempR1 >>= 16;
+        tempR2 >>= 16;
+        tempR3 >>= 16;
+
+        pOutputSamples[i*8+0] = (drflac_int16)tempL0;
+        pOutputSamples[i*8+1] = (drflac_int16)tempR0;
+        pOutputSamples[i*8+2] = (drflac_int16)tempL1;
+        pOutputSamples[i*8+3] = (drflac_int16)tempR1;
+        pOutputSamples[i*8+4] = (drflac_int16)tempL2;
+        pOutputSamples[i*8+5] = (drflac_int16)tempR2;
+        pOutputSamples[i*8+6] = (drflac_int16)tempL3;
+        pOutputSamples[i*8+7] = (drflac_int16)tempR3;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16);
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+
+        left  = _mm_srai_epi32(left,  16);
+        right = _mm_srai_epi32(right, 16);
+
+        /* At this point we have results. We can now pack and interleave these into a single __m128i object and then store the in the output buffer. */
+        _mm_storeu_si128((__m128i*)(pOutputSamples + i*8), drflac__mm_packs_interleaved_epi32(left, right));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16);
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    int32x4_t shift0_4 = vdupq_n_s32(shift0);
+    int32x4_t shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        int32x4_t left;
+        int32x4_t right;
+
+        left  = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4));
+        right = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4));
+
+        left  = vshrq_n_s32(left,  16);
+        right = vshrq_n_s32(right, 16);
+
+        drflac__vst2q_s16(pOutputSamples + i*8, vzip_s16(vmovn_s32(left), vmovn_s32(right)));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int16)((pInputSamples0U32[i] << shift0) >> 16);
+        pOutputSamples[i*2+1] = (drflac_int16)((pInputSamples1U32[i] << shift1) >> 16);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_s16__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, drflac_int16* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_s16__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_s16__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_s16__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_s16(drflac* pFlac, drflac_uint64 framesToRead, drflac_int16* pBufferOut)
+{
+    drflac_uint64 framesRead;
+    drflac_uint32 unusedBitsPerSample;
+
+    if (pFlac == NULL || framesToRead == 0) {
+        return 0;
+    }
+
+    if (pBufferOut == NULL) {
+        return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead);
+    }
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 32);
+    unusedBitsPerSample = 32 - pFlac->bitsPerSample;
+
+    framesRead = 0;
+    while (framesToRead > 0) {
+        /* If we've run out of samples in this frame, go to the next. */
+        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                break;  /* Couldn't read the next frame, so just break from the loop and return. */
+            }
+        } else {
+            unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
+            drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining;
+            drflac_uint64 frameCountThisIteration = framesToRead;
+
+            if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) {
+                frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining;
+            }
+
+            if (channelCount == 2) {
+                const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame;
+                const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame;
+
+                switch (pFlac->currentFLACFrame.header.channelAssignment)
+                {
+                    case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE:
+                    {
+                        drflac_read_pcm_frames_s16__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
+                    {
+                        drflac_read_pcm_frames_s16__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE:
+                    {
+                        drflac_read_pcm_frames_s16__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT:
+                    default:
+                    {
+                        drflac_read_pcm_frames_s16__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+                }
+            } else {
+                /* Generic interleaving. */
+                drflac_uint64 i;
+                for (i = 0; i < frameCountThisIteration; ++i) {
+                    unsigned int j;
+                    for (j = 0; j < channelCount; ++j) {
+                        drflac_int32 sampleS32 = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample));
+                        pBufferOut[(i*channelCount)+j] = (drflac_int16)(sampleS32 >> 16);
+                    }
+                }
+            }
+
+            framesRead                += frameCountThisIteration;
+            pBufferOut                += frameCountThisIteration * channelCount;
+            framesToRead              -= frameCountThisIteration;
+            pFlac->currentPCMFrame    += frameCountThisIteration;
+            pFlac->currentFLACFrame.pcmFramesRemaining -= (drflac_uint32)frameCountThisIteration;
+        }
+    }
+
+    return framesRead;
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 left  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 side  = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (float)((drflac_int32)left  / 2147483648.0);
+        pOutputSamples[i*2+1] = (float)((drflac_int32)right / 2147483648.0);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+    float factor = 1 / 2147483648.0;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 left0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 left1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 left2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 left3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 side0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 side1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 side2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 side3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 right0 = left0 - side0;
+        drflac_uint32 right1 = left1 - side1;
+        drflac_uint32 right2 = left2 - side2;
+        drflac_uint32 right3 = left3 - side3;
+
+        pOutputSamples[i*8+0] = (drflac_int32)left0  * factor;
+        pOutputSamples[i*8+1] = (drflac_int32)right0 * factor;
+        pOutputSamples[i*8+2] = (drflac_int32)left1  * factor;
+        pOutputSamples[i*8+3] = (drflac_int32)right1 * factor;
+        pOutputSamples[i*8+4] = (drflac_int32)left2  * factor;
+        pOutputSamples[i*8+5] = (drflac_int32)right2 * factor;
+        pOutputSamples[i*8+6] = (drflac_int32)left3  * factor;
+        pOutputSamples[i*8+7] = (drflac_int32)right3 * factor;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)right * factor;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+    __m128 factor;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor = _mm_set1_ps(1.0f / 8388608.0f);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i left  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i right = _mm_sub_epi32(left, side);
+        __m128 leftf  = _mm_mul_ps(_mm_cvtepi32_ps(left),  factor);
+        __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor);
+
+        _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
+        _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
+        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+    float32x4_t factor4;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor4  = vdupq_n_f32(1.0f / 8388608.0f);
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t left;
+        uint32x4_t side;
+        uint32x4_t right;
+        float32x4_t leftf;
+        float32x4_t rightf;
+
+        left   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        side   = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        right  = vsubq_u32(left, side);
+        leftf  = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(left)),  factor4);
+        rightf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(right)), factor4);
+
+        drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 left  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 side  = pInputSamples1U32[i] << shift1;
+        drflac_uint32 right = left - side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
+        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_left_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_left_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_left_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_f32__decode_left_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_f32__decode_left_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    for (i = 0; i < frameCount; ++i) {
+        drflac_uint32 side  = (drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+        drflac_uint32 right = (drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (float)((drflac_int32)left  / 2147483648.0);
+        pOutputSamples[i*2+1] = (float)((drflac_int32)right / 2147483648.0);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    float factor = 1 / 2147483648.0;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 side0  = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 side1  = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 side2  = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 side3  = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 right0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 right1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 right2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 right3 = pInputSamples1U32[i*4+3] << shift1;
+
+        drflac_uint32 left0 = right0 + side0;
+        drflac_uint32 left1 = right1 + side1;
+        drflac_uint32 left2 = right2 + side2;
+        drflac_uint32 left3 = right3 + side3;
+
+        pOutputSamples[i*8+0] = (drflac_int32)left0  * factor;
+        pOutputSamples[i*8+1] = (drflac_int32)right0 * factor;
+        pOutputSamples[i*8+2] = (drflac_int32)left1  * factor;
+        pOutputSamples[i*8+3] = (drflac_int32)right1 * factor;
+        pOutputSamples[i*8+4] = (drflac_int32)left2  * factor;
+        pOutputSamples[i*8+5] = (drflac_int32)right2 * factor;
+        pOutputSamples[i*8+6] = (drflac_int32)left3  * factor;
+        pOutputSamples[i*8+7] = (drflac_int32)right3 * factor;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)right * factor;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+    __m128 factor;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor = _mm_set1_ps(1.0f / 8388608.0f);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i side  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        __m128i right = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+        __m128i left  = _mm_add_epi32(right, side);
+        __m128 leftf  = _mm_mul_ps(_mm_cvtepi32_ps(left),  factor);
+        __m128 rightf = _mm_mul_ps(_mm_cvtepi32_ps(right), factor);
+
+        _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
+        _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
+        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+    float32x4_t factor4;
+    int32x4_t shift0_4;
+    int32x4_t shift1_4;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor4  = vdupq_n_f32(1.0f / 8388608.0f);
+    shift0_4 = vdupq_n_s32(shift0);
+    shift1_4 = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        uint32x4_t side;
+        uint32x4_t right;
+        uint32x4_t left;
+        float32x4_t leftf;
+        float32x4_t rightf;
+
+        side   = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4);
+        right  = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4);
+        left   = vaddq_u32(right, side);
+        leftf  = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(left)),  factor4);
+        rightf = vmulq_f32(vcvtq_f32_s32(vreinterpretq_s32_u32(right)), factor4);
+
+        drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 side  = pInputSamples0U32[i] << shift0;
+        drflac_uint32 right = pInputSamples1U32[i] << shift1;
+        drflac_uint32 left  = right + side;
+
+        pOutputSamples[i*2+0] = (drflac_int32)left  / 8388608.0f;
+        pOutputSamples[i*2+1] = (drflac_int32)right / 8388608.0f;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_right_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_right_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_right_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_f32__decode_right_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_f32__decode_right_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        drflac_uint32 mid  = (drflac_uint32)pInputSamples0[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = (drflac_uint32)pInputSamples1[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (float)((((drflac_int32)(mid + side) >> 1) << (unusedBitsPerSample)) / 2147483648.0);
+        pOutputSamples[i*2+1] = (float)((((drflac_int32)(mid - side) >> 1) << (unusedBitsPerSample)) / 2147483648.0);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample;
+    float factor = 1 / 2147483648.0;
+
+    if (shift > 0) {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = (mid0 + side0) << shift;
+            temp1L = (mid1 + side1) << shift;
+            temp2L = (mid2 + side2) << shift;
+            temp3L = (mid3 + side3) << shift;
+
+            temp0R = (mid0 - side0) << shift;
+            temp1R = (mid1 - side1) << shift;
+            temp2R = (mid2 - side2) << shift;
+            temp3R = (mid3 - side3) << shift;
+
+            pOutputSamples[i*8+0] = (drflac_int32)temp0L * factor;
+            pOutputSamples[i*8+1] = (drflac_int32)temp0R * factor;
+            pOutputSamples[i*8+2] = (drflac_int32)temp1L * factor;
+            pOutputSamples[i*8+3] = (drflac_int32)temp1R * factor;
+            pOutputSamples[i*8+4] = (drflac_int32)temp2L * factor;
+            pOutputSamples[i*8+5] = (drflac_int32)temp2R * factor;
+            pOutputSamples[i*8+6] = (drflac_int32)temp3L * factor;
+            pOutputSamples[i*8+7] = (drflac_int32)temp3R * factor;
+        }
+    } else {
+        for (i = 0; i < frameCount4; ++i) {
+            drflac_uint32 temp0L;
+            drflac_uint32 temp1L;
+            drflac_uint32 temp2L;
+            drflac_uint32 temp3L;
+            drflac_uint32 temp0R;
+            drflac_uint32 temp1R;
+            drflac_uint32 temp2R;
+            drflac_uint32 temp3R;
+
+            drflac_uint32 mid0  = pInputSamples0U32[i*4+0] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid1  = pInputSamples0U32[i*4+1] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid2  = pInputSamples0U32[i*4+2] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 mid3  = pInputSamples0U32[i*4+3] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+
+            drflac_uint32 side0 = pInputSamples1U32[i*4+0] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side1 = pInputSamples1U32[i*4+1] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side2 = pInputSamples1U32[i*4+2] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+            drflac_uint32 side3 = pInputSamples1U32[i*4+3] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid0 = (mid0 << 1) | (side0 & 0x01);
+            mid1 = (mid1 << 1) | (side1 & 0x01);
+            mid2 = (mid2 << 1) | (side2 & 0x01);
+            mid3 = (mid3 << 1) | (side3 & 0x01);
+
+            temp0L = (drflac_uint32)((drflac_int32)(mid0 + side0) >> 1);
+            temp1L = (drflac_uint32)((drflac_int32)(mid1 + side1) >> 1);
+            temp2L = (drflac_uint32)((drflac_int32)(mid2 + side2) >> 1);
+            temp3L = (drflac_uint32)((drflac_int32)(mid3 + side3) >> 1);
+
+            temp0R = (drflac_uint32)((drflac_int32)(mid0 - side0) >> 1);
+            temp1R = (drflac_uint32)((drflac_int32)(mid1 - side1) >> 1);
+            temp2R = (drflac_uint32)((drflac_int32)(mid2 - side2) >> 1);
+            temp3R = (drflac_uint32)((drflac_int32)(mid3 - side3) >> 1);
+
+            pOutputSamples[i*8+0] = (drflac_int32)temp0L * factor;
+            pOutputSamples[i*8+1] = (drflac_int32)temp0R * factor;
+            pOutputSamples[i*8+2] = (drflac_int32)temp1L * factor;
+            pOutputSamples[i*8+3] = (drflac_int32)temp1R * factor;
+            pOutputSamples[i*8+4] = (drflac_int32)temp2L * factor;
+            pOutputSamples[i*8+5] = (drflac_int32)temp2R * factor;
+            pOutputSamples[i*8+6] = (drflac_int32)temp3L * factor;
+            pOutputSamples[i*8+7] = (drflac_int32)temp3R * factor;
+        }
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+        drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+        mid = (mid << 1) | (side & 0x01);
+
+        pOutputSamples[i*2+0] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid + side) >> 1) << unusedBitsPerSample) * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)((drflac_uint32)((drflac_int32)(mid - side) >> 1) << unusedBitsPerSample) * factor;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample - 8;
+    float factor;
+    __m128 factor128;
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor = 1.0f / 8388608.0f;
+    factor128 = _mm_set1_ps(factor);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i tempL;
+            __m128i tempR;
+            __m128  leftf;
+            __m128  rightf;
+
+            mid    = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid    = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            tempL  = _mm_srai_epi32(_mm_add_epi32(mid, side), 1);
+            tempR  = _mm_srai_epi32(_mm_sub_epi32(mid, side), 1);
+
+            leftf  = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128);
+            rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128);
+
+            _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
+            _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = ((drflac_int32)(mid + side) >> 1) * factor;
+            pOutputSamples[i*2+1] = ((drflac_int32)(mid - side) >> 1) * factor;
+        }
+    } else {
+        shift -= 1;
+        for (i = 0; i < frameCount4; ++i) {
+            __m128i mid;
+            __m128i side;
+            __m128i tempL;
+            __m128i tempR;
+            __m128 leftf;
+            __m128 rightf;
+
+            mid    = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+            side   = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+            mid    = _mm_or_si128(_mm_slli_epi32(mid, 1), _mm_and_si128(side, _mm_set1_epi32(0x01)));
+
+            tempL  = _mm_slli_epi32(_mm_add_epi32(mid, side), shift);
+            tempR  = _mm_slli_epi32(_mm_sub_epi32(mid, side), shift);
+
+            leftf  = _mm_mul_ps(_mm_cvtepi32_ps(tempL), factor128);
+            rightf = _mm_mul_ps(_mm_cvtepi32_ps(tempR), factor128);
+
+            _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
+            _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift) * factor;
+            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift) * factor;
+        }
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift = unusedBitsPerSample - 8;
+    float factor;
+    float32x4_t factor4;
+    int32x4_t shift4;
+    int32x4_t wbps0_4;  /* Wasted Bits Per Sample */
+    int32x4_t wbps1_4;  /* Wasted Bits Per Sample */
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 24);
+
+    factor  = 1.0f / 8388608.0f;
+    factor4 = vdupq_n_f32(factor);
+    wbps0_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample);
+    wbps1_4 = vdupq_n_s32(pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample);
+
+    if (shift == 0) {
+        for (i = 0; i < frameCount4; ++i) {
+            int32x4_t lefti;
+            int32x4_t righti;
+            float32x4_t leftf;
+            float32x4_t rightf;
+
+            uint32x4_t mid  = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbps0_4);
+            uint32x4_t side = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbps1_4);
+
+            mid    = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
+
+            lefti  = vshrq_n_s32(vreinterpretq_s32_u32(vaddq_u32(mid, side)), 1);
+            righti = vshrq_n_s32(vreinterpretq_s32_u32(vsubq_u32(mid, side)), 1);
+
+            leftf  = vmulq_f32(vcvtq_f32_s32(lefti),  factor4);
+            rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
+
+            drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = ((drflac_int32)(mid + side) >> 1) * factor;
+            pOutputSamples[i*2+1] = ((drflac_int32)(mid - side) >> 1) * factor;
+        }
+    } else {
+        shift -= 1;
+        shift4 = vdupq_n_s32(shift);
+        for (i = 0; i < frameCount4; ++i) {
+            uint32x4_t mid;
+            uint32x4_t side;
+            int32x4_t lefti;
+            int32x4_t righti;
+            float32x4_t leftf;
+            float32x4_t rightf;
+
+            mid    = vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), wbps0_4);
+            side   = vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), wbps1_4);
+
+            mid    = vorrq_u32(vshlq_n_u32(mid, 1), vandq_u32(side, vdupq_n_u32(1)));
+
+            lefti  = vreinterpretq_s32_u32(vshlq_u32(vaddq_u32(mid, side), shift4));
+            righti = vreinterpretq_s32_u32(vshlq_u32(vsubq_u32(mid, side), shift4));
+
+            leftf  = vmulq_f32(vcvtq_f32_s32(lefti),  factor4);
+            rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
+
+            drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
+        }
+
+        for (i = (frameCount4 << 2); i < frameCount; ++i) {
+            drflac_uint32 mid  = pInputSamples0U32[i] << pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+            drflac_uint32 side = pInputSamples1U32[i] << pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+
+            mid = (mid << 1) | (side & 0x01);
+
+            pOutputSamples[i*2+0] = (drflac_int32)((mid + side) << shift) * factor;
+            pOutputSamples[i*2+1] = (drflac_int32)((mid - side) << shift) * factor;
+        }
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_mid_side(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_mid_side__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_mid_side__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_f32__decode_mid_side__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_f32__decode_mid_side__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+#if 0
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__reference(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    for (drflac_uint64 i = 0; i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (float)((drflac_int32)((drflac_uint32)pInputSamples0[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample)) / 2147483648.0);
+        pOutputSamples[i*2+1] = (float)((drflac_int32)((drflac_uint32)pInputSamples1[i] << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample)) / 2147483648.0);
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample;
+    drflac_uint32 shift1 = unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample;
+    float factor = 1 / 2147483648.0;
+
+    for (i = 0; i < frameCount4; ++i) {
+        drflac_uint32 tempL0 = pInputSamples0U32[i*4+0] << shift0;
+        drflac_uint32 tempL1 = pInputSamples0U32[i*4+1] << shift0;
+        drflac_uint32 tempL2 = pInputSamples0U32[i*4+2] << shift0;
+        drflac_uint32 tempL3 = pInputSamples0U32[i*4+3] << shift0;
+
+        drflac_uint32 tempR0 = pInputSamples1U32[i*4+0] << shift1;
+        drflac_uint32 tempR1 = pInputSamples1U32[i*4+1] << shift1;
+        drflac_uint32 tempR2 = pInputSamples1U32[i*4+2] << shift1;
+        drflac_uint32 tempR3 = pInputSamples1U32[i*4+3] << shift1;
+
+        pOutputSamples[i*8+0] = (drflac_int32)tempL0 * factor;
+        pOutputSamples[i*8+1] = (drflac_int32)tempR0 * factor;
+        pOutputSamples[i*8+2] = (drflac_int32)tempL1 * factor;
+        pOutputSamples[i*8+3] = (drflac_int32)tempR1 * factor;
+        pOutputSamples[i*8+4] = (drflac_int32)tempL2 * factor;
+        pOutputSamples[i*8+5] = (drflac_int32)tempR2 * factor;
+        pOutputSamples[i*8+6] = (drflac_int32)tempL3 * factor;
+        pOutputSamples[i*8+7] = (drflac_int32)tempR3 * factor;
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor;
+    }
+}
+
+#if defined(DRFLAC_SUPPORT_SSE2)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+
+    float factor = 1.0f / 8388608.0f;
+    __m128 factor128 = _mm_set1_ps(factor);
+
+    for (i = 0; i < frameCount4; ++i) {
+        __m128i lefti;
+        __m128i righti;
+        __m128 leftf;
+        __m128 rightf;
+
+        lefti  = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples0 + i), shift0);
+        righti = _mm_slli_epi32(_mm_loadu_si128((const __m128i*)pInputSamples1 + i), shift1);
+
+        leftf  = _mm_mul_ps(_mm_cvtepi32_ps(lefti),  factor128);
+        rightf = _mm_mul_ps(_mm_cvtepi32_ps(righti), factor128);
+
+        _mm_storeu_ps(pOutputSamples + i*8 + 0, _mm_unpacklo_ps(leftf, rightf));
+        _mm_storeu_ps(pOutputSamples + i*8 + 4, _mm_unpackhi_ps(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor;
+    }
+}
+#endif
+
+#if defined(DRFLAC_SUPPORT_NEON)
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo__neon(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+    drflac_uint64 i;
+    drflac_uint64 frameCount4 = frameCount >> 2;
+    const drflac_uint32* pInputSamples0U32 = (const drflac_uint32*)pInputSamples0;
+    const drflac_uint32* pInputSamples1U32 = (const drflac_uint32*)pInputSamples1;
+    drflac_uint32 shift0 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[0].wastedBitsPerSample) - 8;
+    drflac_uint32 shift1 = (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[1].wastedBitsPerSample) - 8;
+
+    float factor = 1.0f / 8388608.0f;
+    float32x4_t factor4 = vdupq_n_f32(factor);
+    int32x4_t shift0_4  = vdupq_n_s32(shift0);
+    int32x4_t shift1_4  = vdupq_n_s32(shift1);
+
+    for (i = 0; i < frameCount4; ++i) {
+        int32x4_t lefti;
+        int32x4_t righti;
+        float32x4_t leftf;
+        float32x4_t rightf;
+
+        lefti  = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples0U32 + i*4), shift0_4));
+        righti = vreinterpretq_s32_u32(vshlq_u32(vld1q_u32(pInputSamples1U32 + i*4), shift1_4));
+
+        leftf  = vmulq_f32(vcvtq_f32_s32(lefti),  factor4);
+        rightf = vmulq_f32(vcvtq_f32_s32(righti), factor4);
+
+        drflac__vst2q_f32(pOutputSamples + i*8, vzipq_f32(leftf, rightf));
+    }
+
+    for (i = (frameCount4 << 2); i < frameCount; ++i) {
+        pOutputSamples[i*2+0] = (drflac_int32)(pInputSamples0U32[i] << shift0) * factor;
+        pOutputSamples[i*2+1] = (drflac_int32)(pInputSamples1U32[i] << shift1) * factor;
+    }
+}
+#endif
+
+static DRFLAC_INLINE void drflac_read_pcm_frames_f32__decode_independent_stereo(drflac* pFlac, drflac_uint64 frameCount, drflac_uint32 unusedBitsPerSample, const drflac_int32* pInputSamples0, const drflac_int32* pInputSamples1, float* pOutputSamples)
+{
+#if defined(DRFLAC_SUPPORT_SSE2)
+    if (drflac__gIsSSE2Supported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_independent_stereo__sse2(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#elif defined(DRFLAC_SUPPORT_NEON)
+    if (drflac__gIsNEONSupported && pFlac->bitsPerSample <= 24) {
+        drflac_read_pcm_frames_f32__decode_independent_stereo__neon(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+    } else
+#endif
+    {
+        /* Scalar fallback. */
+#if 0
+        drflac_read_pcm_frames_f32__decode_independent_stereo__reference(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#else
+        drflac_read_pcm_frames_f32__decode_independent_stereo__scalar(pFlac, frameCount, unusedBitsPerSample, pInputSamples0, pInputSamples1, pOutputSamples);
+#endif
+    }
+}
+
+DRFLAC_API drflac_uint64 drflac_read_pcm_frames_f32(drflac* pFlac, drflac_uint64 framesToRead, float* pBufferOut)
+{
+    drflac_uint64 framesRead;
+    drflac_uint32 unusedBitsPerSample;
+
+    if (pFlac == NULL || framesToRead == 0) {
+        return 0;
+    }
+
+    if (pBufferOut == NULL) {
+        return drflac__seek_forward_by_pcm_frames(pFlac, framesToRead);
+    }
+
+    DRFLAC_ASSERT(pFlac->bitsPerSample <= 32);
+    unusedBitsPerSample = 32 - pFlac->bitsPerSample;
+
+    framesRead = 0;
+    while (framesToRead > 0) {
+        /* If we've run out of samples in this frame, go to the next. */
+        if (pFlac->currentFLACFrame.pcmFramesRemaining == 0) {
+            if (!drflac__read_and_decode_next_flac_frame(pFlac)) {
+                break;  /* Couldn't read the next frame, so just break from the loop and return. */
+            }
+        } else {
+            unsigned int channelCount = drflac__get_channel_count_from_channel_assignment(pFlac->currentFLACFrame.header.channelAssignment);
+            drflac_uint64 iFirstPCMFrame = pFlac->currentFLACFrame.header.blockSizeInPCMFrames - pFlac->currentFLACFrame.pcmFramesRemaining;
+            drflac_uint64 frameCountThisIteration = framesToRead;
+
+            if (frameCountThisIteration > pFlac->currentFLACFrame.pcmFramesRemaining) {
+                frameCountThisIteration = pFlac->currentFLACFrame.pcmFramesRemaining;
+            }
+
+            if (channelCount == 2) {
+                const drflac_int32* pDecodedSamples0 = pFlac->currentFLACFrame.subframes[0].pSamplesS32 + iFirstPCMFrame;
+                const drflac_int32* pDecodedSamples1 = pFlac->currentFLACFrame.subframes[1].pSamplesS32 + iFirstPCMFrame;
+
+                switch (pFlac->currentFLACFrame.header.channelAssignment)
+                {
+                    case DRFLAC_CHANNEL_ASSIGNMENT_LEFT_SIDE:
+                    {
+                        drflac_read_pcm_frames_f32__decode_left_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_RIGHT_SIDE:
+                    {
+                        drflac_read_pcm_frames_f32__decode_right_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_MID_SIDE:
+                    {
+                        drflac_read_pcm_frames_f32__decode_mid_side(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+
+                    case DRFLAC_CHANNEL_ASSIGNMENT_INDEPENDENT:
+                    default:
+                    {
+                        drflac_read_pcm_frames_f32__decode_independent_stereo(pFlac, frameCountThisIteration, unusedBitsPerSample, pDecodedSamples0, pDecodedSamples1, pBufferOut);
+                    } break;
+                }
+            } else {
+                /* Generic interleaving. */
+                drflac_uint64 i;
+                for (i = 0; i < frameCountThisIteration; ++i) {
+                    unsigned int j;
+                    for (j = 0; j < channelCount; ++j) {
+                        drflac_int32 sampleS32 = (drflac_int32)((drflac_uint32)(pFlac->currentFLACFrame.subframes[j].pSamplesS32[iFirstPCMFrame + i]) << (unusedBitsPerSample + pFlac->currentFLACFrame.subframes[j].wastedBitsPerSample));
+                        pBufferOut[(i*channelCount)+j] = (float)(sampleS32 / 2147483648.0);
+                    }
+                }
+            }
+
+            framesRead                += frameCountThisIteration;
+            pBufferOut                += frameCountThisIteration * channelCount;
+            framesToRead              -= frameCountThisIteration;
+            pFlac->currentPCMFrame    += frameCountThisIteration;
+            pFlac->currentFLACFrame.pcmFramesRemaining -= (unsigned int)frameCountThisIteration;
+        }
+    }
+
+    return framesRead;
+}
+
+
+DRFLAC_API drflac_bool32 drflac_seek_to_pcm_frame(drflac* pFlac, drflac_uint64 pcmFrameIndex)
+{
+    if (pFlac == NULL) {
+        return DRFLAC_FALSE;
+    }
+
+    /* Don't do anything if we're already on the seek point. */
+    if (pFlac->currentPCMFrame == pcmFrameIndex) {
+        return DRFLAC_TRUE;
+    }
+
+    /*
+    If we don't know where the first frame begins then we can't seek. This will happen when the STREAMINFO block was not present
+    when the decoder was opened.
+    */
+    if (pFlac->firstFLACFramePosInBytes == 0) {
+        return DRFLAC_FALSE;
+    }
+
+    if (pcmFrameIndex == 0) {
+        pFlac->currentPCMFrame = 0;
+        return drflac__seek_to_first_frame(pFlac);
+    } else {
+        drflac_bool32 wasSuccessful = DRFLAC_FALSE;
+
+        /* Clamp the sample to the end. */
+        if (pcmFrameIndex > pFlac->totalPCMFrameCount) {
+            pcmFrameIndex = pFlac->totalPCMFrameCount;
+        }
+
+        /* If the target sample and the current sample are in the same frame we just move the position forward. */
+        if (pcmFrameIndex > pFlac->currentPCMFrame) {
+            /* Forward. */
+            drflac_uint32 offset = (drflac_uint32)(pcmFrameIndex - pFlac->currentPCMFrame);
+            if (pFlac->currentFLACFrame.pcmFramesRemaining >  offset) {
+                pFlac->currentFLACFrame.pcmFramesRemaining -= offset;
+                pFlac->currentPCMFrame = pcmFrameIndex;
+                return DRFLAC_TRUE;
+            }
+        } else {
+            /* Backward. */
+            drflac_uint32 offsetAbs = (drflac_uint32)(pFlac->currentPCMFrame - pcmFrameIndex);
+            drflac_uint32 currentFLACFramePCMFrameCount = pFlac->currentFLACFrame.header.blockSizeInPCMFrames;
+            drflac_uint32 currentFLACFramePCMFramesConsumed = currentFLACFramePCMFrameCount - pFlac->currentFLACFrame.pcmFramesRemaining;
+            if (currentFLACFramePCMFramesConsumed > offsetAbs) {
+                pFlac->currentFLACFrame.pcmFramesRemaining += offsetAbs;
+                pFlac->currentPCMFrame = pcmFrameIndex;
+                return DRFLAC_TRUE;
+            }
+        }
+
+        /*
+        Different techniques depending on encapsulation. Using the native FLAC seektable with Ogg encapsulation is a bit awkward so
+        we'll instead use Ogg's natural seeking facility.
+        */
+#ifndef DR_FLAC_NO_OGG
+        if (pFlac->container == drflac_container_ogg)
+        {
+            wasSuccessful = drflac_ogg__seek_to_pcm_frame(pFlac, pcmFrameIndex);
+        }
+        else
+#endif
+        {
+            /* First try seeking via the seek table. If this fails, fall back to a brute force seek which is much slower. */
+            if (/*!wasSuccessful && */!pFlac->_noSeekTableSeek) {
+                wasSuccessful = drflac__seek_to_pcm_frame__seek_table(pFlac, pcmFrameIndex);
+            }
+
+#if !defined(DR_FLAC_NO_CRC)
+            /* Fall back to binary search if seek table seeking fails. This requires the length of the stream to be known. */
+            if (!wasSuccessful && !pFlac->_noBinarySearchSeek && pFlac->totalPCMFrameCount > 0) {
+                wasSuccessful = drflac__seek_to_pcm_frame__binary_search(pFlac, pcmFrameIndex);
+            }
+#endif
+
+            /* Fall back to brute force if all else fails. */
+            if (!wasSuccessful && !pFlac->_noBruteForceSeek) {
+                wasSuccessful = drflac__seek_to_pcm_frame__brute_force(pFlac, pcmFrameIndex);
+            }
+        }
+
+        pFlac->currentPCMFrame = pcmFrameIndex;
+        return wasSuccessful;
+    }
+}
+
+
+
+/* High Level APIs */
+
+#if defined(SIZE_MAX)
+    #define DRFLAC_SIZE_MAX  SIZE_MAX
+#else
+    #if defined(DRFLAC_64BIT)
+        #define DRFLAC_SIZE_MAX  ((drflac_uint64)0xFFFFFFFFFFFFFFFF)
+    #else
+        #define DRFLAC_SIZE_MAX  0xFFFFFFFF
+    #endif
+#endif
+
+
+/* Using a macro as the definition of the drflac__full_decode_and_close_*() API family. Sue me. */
+#define DRFLAC_DEFINE_FULL_READ_AND_CLOSE(extension, type) \
+static type* drflac__full_read_and_close_ ## extension (drflac* pFlac, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut)\
+{                                                                                                                                                                   \
+    type* pSampleData = NULL;                                                                                                                                       \
+    drflac_uint64 totalPCMFrameCount;                                                                                                                               \
+                                                                                                                                                                    \
+    DRFLAC_ASSERT(pFlac != NULL);                                                                                                                                   \
+                                                                                                                                                                    \
+    totalPCMFrameCount = pFlac->totalPCMFrameCount;                                                                                                                 \
+                                                                                                                                                                    \
+    if (totalPCMFrameCount == 0) {                                                                                                                                  \
+        type buffer[4096];                                                                                                                                          \
+        drflac_uint64 pcmFramesRead;                                                                                                                                \
+        size_t sampleDataBufferSize = sizeof(buffer);                                                                                                               \
+                                                                                                                                                                    \
+        pSampleData = (type*)drflac__malloc_from_callbacks(sampleDataBufferSize, &pFlac->allocationCallbacks);                                                      \
+        if (pSampleData == NULL) {                                                                                                                                  \
+            goto on_error;                                                                                                                                          \
+        }                                                                                                                                                           \
+                                                                                                                                                                    \
+        while ((pcmFramesRead = (drflac_uint64)drflac_read_pcm_frames_##extension(pFlac, sizeof(buffer)/sizeof(buffer[0])/pFlac->channels, buffer)) > 0) {          \
+            if (((totalPCMFrameCount + pcmFramesRead) * pFlac->channels * sizeof(type)) > sampleDataBufferSize) {                                                   \
+                type* pNewSampleData;                                                                                                                               \
+                size_t newSampleDataBufferSize;                                                                                                                     \
+                                                                                                                                                                    \
+                newSampleDataBufferSize = sampleDataBufferSize * 2;                                                                                                 \
+                pNewSampleData = (type*)drflac__realloc_from_callbacks(pSampleData, newSampleDataBufferSize, sampleDataBufferSize, &pFlac->allocationCallbacks);    \
+                if (pNewSampleData == NULL) {                                                                                                                       \
+                    drflac__free_from_callbacks(pSampleData, &pFlac->allocationCallbacks);                                                                          \
+                    goto on_error;                                                                                                                                  \
+                }                                                                                                                                                   \
+                                                                                                                                                                    \
+                sampleDataBufferSize = newSampleDataBufferSize;                                                                                                     \
+                pSampleData = pNewSampleData;                                                                                                                       \
+            }                                                                                                                                                       \
+                                                                                                                                                                    \
+            DRFLAC_COPY_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), buffer, (size_t)(pcmFramesRead*pFlac->channels*sizeof(type)));                   \
+            totalPCMFrameCount += pcmFramesRead;                                                                                                                    \
+        }                                                                                                                                                           \
+                                                                                                                                                                    \
+        /* At this point everything should be decoded, but we just want to fill the unused part buffer with silence - need to                                       \
+           protect those ears from random noise! */                                                                                                                 \
+        DRFLAC_ZERO_MEMORY(pSampleData + (totalPCMFrameCount*pFlac->channels), (size_t)(sampleDataBufferSize - totalPCMFrameCount*pFlac->channels*sizeof(type)));   \
+    } else {                                                                                                                                                        \
+        drflac_uint64 dataSize = totalPCMFrameCount*pFlac->channels*sizeof(type);                                                                                   \
+        if (dataSize > DRFLAC_SIZE_MAX) {                                                                                                                           \
+            goto on_error;  /* The decoded data is too big. */                                                                                                      \
+        }                                                                                                                                                           \
+                                                                                                                                                                    \
+        pSampleData = (type*)drflac__malloc_from_callbacks((size_t)dataSize, &pFlac->allocationCallbacks);    /* <-- Safe cast as per the check above. */           \
+        if (pSampleData == NULL) {                                                                                                                                  \
+            goto on_error;                                                                                                                                          \
+        }                                                                                                                                                           \
+                                                                                                                                                                    \
+        totalPCMFrameCount = drflac_read_pcm_frames_##extension(pFlac, pFlac->totalPCMFrameCount, pSampleData);                                                     \
+    }                                                                                                                                                               \
+                                                                                                                                                                    \
+    if (sampleRateOut) *sampleRateOut = pFlac->sampleRate;                                                                                                          \
+    if (channelsOut) *channelsOut = pFlac->channels;                                                                                                                \
+    if (totalPCMFrameCountOut) *totalPCMFrameCountOut = totalPCMFrameCount;                                                                                         \
+                                                                                                                                                                    \
+    drflac_close(pFlac);                                                                                                                                            \
+    return pSampleData;                                                                                                                                             \
+                                                                                                                                                                    \
+on_error:                                                                                                                                                           \
+    drflac_close(pFlac);                                                                                                                                            \
+    return NULL;                                                                                                                                                    \
+}
+
+DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s32, drflac_int32)
+DRFLAC_DEFINE_FULL_READ_AND_CLOSE(s16, drflac_int16)
+DRFLAC_DEFINE_FULL_READ_AND_CLOSE(f32, float)
+
+DRFLAC_API drflac_int32* drflac_open_and_read_pcm_frames_s32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalPCMFrameCountOut) {
+        *totalPCMFrameCountOut = 0;
+    }
+
+    pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
+}
+
+DRFLAC_API drflac_int16* drflac_open_and_read_pcm_frames_s16(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalPCMFrameCountOut) {
+        *totalPCMFrameCountOut = 0;
+    }
+
+    pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s16(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
+}
+
+DRFLAC_API float* drflac_open_and_read_pcm_frames_f32(drflac_read_proc onRead, drflac_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drflac_uint64* totalPCMFrameCountOut, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalPCMFrameCountOut) {
+        *totalPCMFrameCountOut = 0;
+    }
+
+    pFlac = drflac_open(onRead, onSeek, pUserData, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_f32(pFlac, channelsOut, sampleRateOut, totalPCMFrameCountOut);
+}
+
+#ifndef DR_FLAC_NO_STDIO
+DRFLAC_API drflac_int32* drflac_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_file(filename, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+
+DRFLAC_API drflac_int16* drflac_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_file(filename, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+
+DRFLAC_API float* drflac_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_file(filename, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+#endif
+
+DRFLAC_API drflac_int32* drflac_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s32(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+
+DRFLAC_API drflac_int16* drflac_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_s16(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+
+DRFLAC_API float* drflac_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channels, unsigned int* sampleRate, drflac_uint64* totalPCMFrameCount, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    drflac* pFlac;
+
+    if (sampleRate) {
+        *sampleRate = 0;
+    }
+    if (channels) {
+        *channels = 0;
+    }
+    if (totalPCMFrameCount) {
+        *totalPCMFrameCount = 0;
+    }
+
+    pFlac = drflac_open_memory(data, dataSize, pAllocationCallbacks);
+    if (pFlac == NULL) {
+        return NULL;
+    }
+
+    return drflac__full_read_and_close_f32(pFlac, channels, sampleRate, totalPCMFrameCount);
+}
+
+
+DRFLAC_API void drflac_free(void* p, const drflac_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks != NULL) {
+        drflac__free_from_callbacks(p, pAllocationCallbacks);
+    } else {
+        drflac__free_default(p, NULL);
+    }
+}
+
+
+
+
+DRFLAC_API void drflac_init_vorbis_comment_iterator(drflac_vorbis_comment_iterator* pIter, drflac_uint32 commentCount, const void* pComments)
+{
+    if (pIter == NULL) {
+        return;
+    }
+
+    pIter->countRemaining = commentCount;
+    pIter->pRunningData   = (const char*)pComments;
+}
+
+DRFLAC_API const char* drflac_next_vorbis_comment(drflac_vorbis_comment_iterator* pIter, drflac_uint32* pCommentLengthOut)
+{
+    drflac_int32 length;
+    const char* pComment;
+
+    /* Safety. */
+    if (pCommentLengthOut) {
+        *pCommentLengthOut = 0;
+    }
+
+    if (pIter == NULL || pIter->countRemaining == 0 || pIter->pRunningData == NULL) {
+        return NULL;
+    }
+
+    length = drflac__le2host_32(*(const drflac_uint32*)pIter->pRunningData);
+    pIter->pRunningData += 4;
+
+    pComment = pIter->pRunningData;
+    pIter->pRunningData += length;
+    pIter->countRemaining -= 1;
+
+    if (pCommentLengthOut) {
+        *pCommentLengthOut = length;
+    }
+
+    return pComment;
+}
+
+
+
+
+DRFLAC_API void drflac_init_cuesheet_track_iterator(drflac_cuesheet_track_iterator* pIter, drflac_uint32 trackCount, const void* pTrackData)
+{
+    if (pIter == NULL) {
+        return;
+    }
+
+    pIter->countRemaining = trackCount;
+    pIter->pRunningData   = (const char*)pTrackData;
+}
+
+DRFLAC_API drflac_bool32 drflac_next_cuesheet_track(drflac_cuesheet_track_iterator* pIter, drflac_cuesheet_track* pCuesheetTrack)
+{
+    drflac_cuesheet_track cuesheetTrack;
+    const char* pRunningData;
+    drflac_uint64 offsetHi;
+    drflac_uint64 offsetLo;
+
+    if (pIter == NULL || pIter->countRemaining == 0 || pIter->pRunningData == NULL) {
+        return DRFLAC_FALSE;
+    }
+
+    pRunningData = pIter->pRunningData;
+
+    offsetHi                   = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+    offsetLo                   = drflac__be2host_32(*(const drflac_uint32*)pRunningData); pRunningData += 4;
+    cuesheetTrack.offset       = offsetLo | (offsetHi << 32);
+    cuesheetTrack.trackNumber  = pRunningData[0];                                         pRunningData += 1;
+    DRFLAC_COPY_MEMORY(cuesheetTrack.ISRC, pRunningData, sizeof(cuesheetTrack.ISRC));     pRunningData += 12;
+    cuesheetTrack.isAudio      = (pRunningData[0] & 0x80) != 0;
+    cuesheetTrack.preEmphasis  = (pRunningData[0] & 0x40) != 0;                           pRunningData += 14;
+    cuesheetTrack.indexCount   = pRunningData[0];                                         pRunningData += 1;
+    cuesheetTrack.pIndexPoints = (const drflac_cuesheet_track_index*)pRunningData;        pRunningData += cuesheetTrack.indexCount * sizeof(drflac_cuesheet_track_index);
+
+    pIter->pRunningData = pRunningData;
+    pIter->countRemaining -= 1;
+
+    if (pCuesheetTrack) {
+        *pCuesheetTrack = cuesheetTrack;
+    }
+
+    return DRFLAC_TRUE;
+}
+
+#if defined(__GNUC__)
+    #pragma GCC diagnostic pop
+#endif
+#endif  /* DR_FLAC_IMPLEMENTATION */
+
+
+/*
+REVISION HISTORY
+================
+v0.12.13 - 2020-05-16
+  - Add compile-time and run-time version querying.
+    - DRFLAC_VERSION_MINOR
+    - DRFLAC_VERSION_MAJOR
+    - DRFLAC_VERSION_REVISION
+    - DRFLAC_VERSION_STRING
+    - drflac_version()
+    - drflac_version_string()
+
+v0.12.12 - 2020-04-30
+  - Fix compilation errors with VC6.
+
+v0.12.11 - 2020-04-19
+  - Fix some pedantic warnings.
+  - Fix some undefined behaviour warnings.
+
+v0.12.10 - 2020-04-10
+  - Fix some bugs when trying to seek with an invalid seek table.
+
+v0.12.9 - 2020-04-05
+  - Fix warnings.
+
+v0.12.8 - 2020-04-04
+  - Add drflac_open_file_w() and drflac_open_file_with_metadata_w().
+  - Fix some static analysis warnings.
+  - Minor documentation updates.
+
+v0.12.7 - 2020-03-14
+  - Fix compilation errors with VC6.
+
+v0.12.6 - 2020-03-07
+  - Fix compilation error with Visual Studio .NET 2003.
+
+v0.12.5 - 2020-01-30
+  - Silence some static analysis warnings.
+
+v0.12.4 - 2020-01-29
+  - Silence some static analysis warnings.
+
+v0.12.3 - 2019-12-02
+  - Fix some warnings when compiling with GCC and the -Og flag.
+  - Fix a crash in out-of-memory situations.
+  - Fix potential integer overflow bug.
+  - Fix some static analysis warnings.
+  - Fix a possible crash when using custom memory allocators without a custom realloc() implementation.
+  - Fix a bug with binary search seeking where the bits per sample is not a multiple of 8.
+
+v0.12.2 - 2019-10-07
+  - Internal code clean up.
+
+v0.12.1 - 2019-09-29
+  - Fix some Clang Static Analyzer warnings.
+  - Fix an unused variable warning.
+
+v0.12.0 - 2019-09-23
+  - API CHANGE: Add support for user defined memory allocation routines. This system allows the program to specify their own memory allocation
+    routines with a user data pointer for client-specific contextual data. This adds an extra parameter to the end of the following APIs:
+    - drflac_open()
+    - drflac_open_relaxed()
+    - drflac_open_with_metadata()
+    - drflac_open_with_metadata_relaxed()
+    - drflac_open_file()
+    - drflac_open_file_with_metadata()
+    - drflac_open_memory()
+    - drflac_open_memory_with_metadata()
+    - drflac_open_and_read_pcm_frames_s32()
+    - drflac_open_and_read_pcm_frames_s16()
+    - drflac_open_and_read_pcm_frames_f32()
+    - drflac_open_file_and_read_pcm_frames_s32()
+    - drflac_open_file_and_read_pcm_frames_s16()
+    - drflac_open_file_and_read_pcm_frames_f32()
+    - drflac_open_memory_and_read_pcm_frames_s32()
+    - drflac_open_memory_and_read_pcm_frames_s16()
+    - drflac_open_memory_and_read_pcm_frames_f32()
+    Set this extra parameter to NULL to use defaults which is the same as the previous behaviour. Setting this NULL will use
+    DRFLAC_MALLOC, DRFLAC_REALLOC and DRFLAC_FREE.
+  - Remove deprecated APIs:
+    - drflac_read_s32()
+    - drflac_read_s16()
+    - drflac_read_f32()
+    - drflac_seek_to_sample()
+    - drflac_open_and_decode_s32()
+    - drflac_open_and_decode_s16()
+    - drflac_open_and_decode_f32()
+    - drflac_open_and_decode_file_s32()
+    - drflac_open_and_decode_file_s16()
+    - drflac_open_and_decode_file_f32()
+    - drflac_open_and_decode_memory_s32()
+    - drflac_open_and_decode_memory_s16()
+    - drflac_open_and_decode_memory_f32()
+  - Remove drflac.totalSampleCount which is now replaced with drflac.totalPCMFrameCount. You can emulate drflac.totalSampleCount
+    by doing pFlac->totalPCMFrameCount*pFlac->channels.
+  - Rename drflac.currentFrame to drflac.currentFLACFrame to remove ambiguity with PCM frames.
+  - Fix errors when seeking to the end of a stream.
+  - Optimizations to seeking.
+  - SSE improvements and optimizations.
+  - ARM NEON optimizations.
+  - Optimizations to drflac_read_pcm_frames_s16().
+  - Optimizations to drflac_read_pcm_frames_s32().
+
+v0.11.10 - 2019-06-26
+  - Fix a compiler error.
+
+v0.11.9 - 2019-06-16
+  - Silence some ThreadSanitizer warnings.
+
+v0.11.8 - 2019-05-21
+  - Fix warnings.
+
+v0.11.7 - 2019-05-06
+  - C89 fixes.
+
+v0.11.6 - 2019-05-05
+  - Add support for C89.
+  - Fix a compiler warning when CRC is disabled.
+  - Change license to choice of public domain or MIT-0.
+
+v0.11.5 - 2019-04-19
+  - Fix a compiler error with GCC.
+
+v0.11.4 - 2019-04-17
+  - Fix some warnings with GCC when compiling with -std=c99.
+
+v0.11.3 - 2019-04-07
+  - Silence warnings with GCC.
+
+v0.11.2 - 2019-03-10
+  - Fix a warning.
+
+v0.11.1 - 2019-02-17
+  - Fix a potential bug with seeking.
+
+v0.11.0 - 2018-12-16
+  - API CHANGE: Deprecated drflac_read_s32(), drflac_read_s16() and drflac_read_f32() and replaced them with
+    drflac_read_pcm_frames_s32(), drflac_read_pcm_frames_s16() and drflac_read_pcm_frames_f32(). The new APIs take
+    and return PCM frame counts instead of sample counts. To upgrade you will need to change the input count by
+    dividing it by the channel count, and then do the same with the return value.
+  - API_CHANGE: Deprecated drflac_seek_to_sample() and replaced with drflac_seek_to_pcm_frame(). Same rules as
+    the changes to drflac_read_*() apply.
+  - API CHANGE: Deprecated drflac_open_and_decode_*() and replaced with drflac_open_*_and_read_*(). Same rules as
+    the changes to drflac_read_*() apply.
+  - Optimizations.
+
+v0.10.0 - 2018-09-11
+  - Remove the DR_FLAC_NO_WIN32_IO option and the Win32 file IO functionality. If you need to use Win32 file IO you
+    need to do it yourself via the callback API.
+  - Fix the clang build.
+  - Fix undefined behavior.
+  - Fix errors with CUESHEET metdata blocks.
+  - Add an API for iterating over each cuesheet track in the CUESHEET metadata block. This works the same way as the
+    Vorbis comment API.
+  - Other miscellaneous bug fixes, mostly relating to invalid FLAC streams.
+  - Minor optimizations.
+
+v0.9.11 - 2018-08-29
+  - Fix a bug with sample reconstruction.
+
+v0.9.10 - 2018-08-07
+  - Improve 64-bit detection.
+
+v0.9.9 - 2018-08-05
+  - Fix C++ build on older versions of GCC.
+
+v0.9.8 - 2018-07-24
+  - Fix compilation errors.
+
+v0.9.7 - 2018-07-05
+  - Fix a warning.
+
+v0.9.6 - 2018-06-29
+  - Fix some typos.
+
+v0.9.5 - 2018-06-23
+  - Fix some warnings.
+
+v0.9.4 - 2018-06-14
+  - Optimizations to seeking.
+  - Clean up.
+
+v0.9.3 - 2018-05-22
+  - Bug fix.
+
+v0.9.2 - 2018-05-12
+  - Fix a compilation error due to a missing break statement.
+
+v0.9.1 - 2018-04-29
+  - Fix compilation error with Clang.
+
+v0.9 - 2018-04-24
+  - Fix Clang build.
+  - Start using major.minor.revision versioning.
+
+v0.8g - 2018-04-19
+  - Fix build on non-x86/x64 architectures.
+
+v0.8f - 2018-02-02
+  - Stop pretending to support changing rate/channels mid stream.
+
+v0.8e - 2018-02-01
+  - Fix a crash when the block size of a frame is larger than the maximum block size defined by the FLAC stream.
+  - Fix a crash the the Rice partition order is invalid.
+
+v0.8d - 2017-09-22
+  - Add support for decoding streams with ID3 tags. ID3 tags are just skipped.
+
+v0.8c - 2017-09-07
+  - Fix warning on non-x86/x64 architectures.
+
+v0.8b - 2017-08-19
+  - Fix build on non-x86/x64 architectures.
+
+v0.8a - 2017-08-13
+  - A small optimization for the Clang build.
+
+v0.8 - 2017-08-12
+  - API CHANGE: Rename dr_* types to drflac_*.
+  - Optimizations. This brings dr_flac back to about the same class of efficiency as the reference implementation.
+  - Add support for custom implementations of malloc(), realloc(), etc.
+  - Add CRC checking to Ogg encapsulated streams.
+  - Fix VC++ 6 build. This is only for the C++ compiler. The C compiler is not currently supported.
+  - Bug fixes.
+
+v0.7 - 2017-07-23
+  - Add support for opening a stream without a header block. To do this, use drflac_open_relaxed() / drflac_open_with_metadata_relaxed().
+
+v0.6 - 2017-07-22
+  - Add support for recovering from invalid frames. With this change, dr_flac will simply skip over invalid frames as if they
+    never existed. Frames are checked against their sync code, the CRC-8 of the frame header and the CRC-16 of the whole frame.
+
+v0.5 - 2017-07-16
+  - Fix typos.
+  - Change drflac_bool* types to unsigned.
+  - Add CRC checking. This makes dr_flac slower, but can be disabled with #define DR_FLAC_NO_CRC.
+
+v0.4f - 2017-03-10
+  - Fix a couple of bugs with the bitstreaming code.
+
+v0.4e - 2017-02-17
+  - Fix some warnings.
+
+v0.4d - 2016-12-26
+  - Add support for 32-bit floating-point PCM decoding.
+  - Use drflac_int* and drflac_uint* sized types to improve compiler support.
+  - Minor improvements to documentation.
+
+v0.4c - 2016-12-26
+  - Add support for signed 16-bit integer PCM decoding.
+
+v0.4b - 2016-10-23
+  - A minor change to drflac_bool8 and drflac_bool32 types.
+
+v0.4a - 2016-10-11
+  - Rename drBool32 to drflac_bool32 for styling consistency.
+
+v0.4 - 2016-09-29
+  - API/ABI CHANGE: Use fixed size 32-bit booleans instead of the built-in bool type.
+  - API CHANGE: Rename drflac_open_and_decode*() to drflac_open_and_decode*_s32().
+  - API CHANGE: Swap the order of "channels" and "sampleRate" parameters in drflac_open_and_decode*(). Rationale for this is to
+    keep it consistent with drflac_audio.
+
+v0.3f - 2016-09-21
+  - Fix a warning with GCC.
+
+v0.3e - 2016-09-18
+  - Fixed a bug where GCC 4.3+ was not getting properly identified.
+  - Fixed a few typos.
+  - Changed date formats to ISO 8601 (YYYY-MM-DD).
+
+v0.3d - 2016-06-11
+  - Minor clean up.
+
+v0.3c - 2016-05-28
+  - Fixed compilation error.
+
+v0.3b - 2016-05-16
+  - Fixed Linux/GCC build.
+  - Updated documentation.
+
+v0.3a - 2016-05-15
+  - Minor fixes to documentation.
+
+v0.3 - 2016-05-11
+  - Optimizations. Now at about parity with the reference implementation on 32-bit builds.
+  - Lots of clean up.
+
+v0.2b - 2016-05-10
+  - Bug fixes.
+
+v0.2a - 2016-05-10
+  - Made drflac_open_and_decode() more robust.
+  - Removed an unused debugging variable
+
+v0.2 - 2016-05-09
+  - Added support for Ogg encapsulation.
+  - API CHANGE. Have the onSeek callback take a third argument which specifies whether or not the seek
+    should be relative to the start or the current position. Also changes the seeking rules such that
+    seeking offsets will never be negative.
+  - Have drflac_open_and_decode() fail gracefully if the stream has an unknown total sample count.
+
+v0.1b - 2016-05-07
+  - Properly close the file handle in drflac_open_file() and family when the decoder fails to initialize.
+  - Removed a stale comment.
+
+v0.1a - 2016-05-05
+  - Minor formatting changes.
+  - Fixed a warning on the GCC build.
+
+v0.1 - 2016-05-03
+  - Initial versioned release.
+*/
+
+/*
+This software is available as a choice of the following licenses. Choose
+whichever you prefer.
+
+===============================================================================
+ALTERNATIVE 1 - Public Domain (www.unlicense.org)
+===============================================================================
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
+
+===============================================================================
+ALTERNATIVE 2 - MIT No Attribution
+===============================================================================
+Copyright 2020 David Reid
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
diff --git a/sys-tune/source/impl/dr_mp3.h b/sys-tune/source/impl/dr_mp3.h
index e3d27a1..4450b72 100644
--- a/sys-tune/source/impl/dr_mp3.h
+++ b/sys-tune/source/impl/dr_mp3.h
@@ -795,12 +795,12 @@ static unsigned drmp3_hdr_bitrate_kbps(const drmp3_uint8 *h)
         { { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,16,24,28,32,40,48,56,64,72,80,88,96,112,128 } },
         { { 0,16,20,24,28,32,40,48,56,64,80,96,112,128,160 }, { 0,16,24,28,32,40,48,56,64,80,96,112,128,160,192 }, { 0,16,32,48,64,80,96,112,128,144,160,176,192,208,224 } },
     };
-    return 8*halfrate[!!DRMP3_HDR_TEST_MPEG1(h)][DRMP3_HDR_GET_LAYER(h) - 1][DRMP3_HDR_GET_BITRATE(h)];
+    return 2*halfrate[!!DRMP3_HDR_TEST_MPEG1(h)][DRMP3_HDR_GET_LAYER(h) - 1][DRMP3_HDR_GET_BITRATE(h)];
 }
 
 static unsigned drmp3_hdr_sample_rate_hz(const drmp3_uint8 *h)
 {
-    static const unsigned g_hz[3] = { 88200, 96000, 64000 };
+    static const unsigned g_hz[3] = { 44100, 48000, 32000 };
     return g_hz[DRMP3_HDR_GET_SAMPLE_RATE(h)] >> (int)!DRMP3_HDR_TEST_MPEG1(h) >> (int)!DRMP3_HDR_TEST_NOT_MPEG25(h);
 }
 
@@ -2252,7 +2252,7 @@ DRMP3_API int drmp3dec_decode_frame(drmp3dec *dec, const drmp3_uint8 *mp3, int m
     drmp3_bs_init(bs_frame, hdr + DRMP3_HDR_SIZE, frame_size - DRMP3_HDR_SIZE);
     if (DRMP3_HDR_IS_CRC(hdr))
     {
-        drmp3_bs_get_bits(bs_frame, 128);
+        drmp3_bs_get_bits(bs_frame, 16);
     }
 
     if (info->layer == 3)
diff --git a/sys-tune/source/impl/dr_mp3.h.bak b/sys-tune/source/impl/dr_mp3.h.bak
index 63f75fe..81d4d2c 100644
--- a/sys-tune/source/impl/dr_mp3.h.bak
+++ b/sys-tune/source/impl/dr_mp3.h.bak
@@ -795,7 +795,7 @@ static unsigned drmp3_hdr_bitrate_kbps(const drmp3_uint8 *h)
         { { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,4,8,12,16,20,24,28,32,40,48,56,64,72,80 }, { 0,16,24,28,32,40,48,56,64,72,80,88,96,112,128 } },
         { { 0,16,20,24,28,32,40,48,56,64,80,96,112,128,160 }, { 0,16,24,28,32,40,48,56,64,80,96,112,128,160,192 }, { 0,16,32,48,64,80,96,112,128,144,160,176,192,208,224 } },
     };
-    return 8*halfrate[!!DRMP3_HDR_TEST_MPEG1(h)][DRMP3_HDR_GET_LAYER(h) - 1][DRMP3_HDR_GET_BITRATE(h)];
+    return 2*halfrate[!!DRMP3_HDR_TEST_MPEG1(h)][DRMP3_HDR_GET_LAYER(h) - 1][DRMP3_HDR_GET_BITRATE(h)];
 }
 
 static unsigned drmp3_hdr_sample_rate_hz(const drmp3_uint8 *h)
@@ -2252,7 +2252,7 @@ DRMP3_API int drmp3dec_decode_frame(drmp3dec *dec, const drmp3_uint8 *mp3, int m
     drmp3_bs_init(bs_frame, hdr + DRMP3_HDR_SIZE, frame_size - DRMP3_HDR_SIZE);
     if (DRMP3_HDR_IS_CRC(hdr))
     {
-        drmp3_bs_get_bits(bs_frame, 128);
+        drmp3_bs_get_bits(bs_frame, 16);
     }
 
     if (info->layer == 3)
diff --git a/sys-tune/source/impl/dr_wav.h b/sys-tune/source/impl/dr_wav.h
index 9645cfb..8817000 100644
--- a/sys-tune/source/impl/dr_wav.h
+++ b/sys-tune/source/impl/dr_wav.h
@@ -1,6129 +1,6129 @@
-/*
-WAV audio loader and writer. Choice of public domain or MIT-0. See license statements at the end of this file.
-dr_wav - v0.12.5 - 2020-05-27
-
-David Reid - mackron@gmail.com
-
-GitHub: https://github.com/mackron/dr_libs
-*/
-
-/*
-RELEASE NOTES - VERSION 0.12
-============================
-Version 0.12 includes breaking changes to custom chunk handling.
-
-
-Changes to Chunk Callback
--------------------------
-dr_wav supports the ability to fire a callback when a chunk is encounted (except for WAVE and FMT chunks). The callback has been updated to include both the
-container (RIFF or Wave64) and the FMT chunk which contains information about the format of the data in the wave file.
-
-Previously, there was no direct way to determine the container, and therefore no way discriminate against the different IDs in the chunk header (RIFF and
-Wave64 containers encode chunk ID's differently). The `container` parameter can be used to know which ID to use.
-
-Sometimes it can be useful to know the data format at the time the chunk callback is fired. A pointer to a `drwav_fmt` object is now passed into the chunk
-callback which will give you information about the data format. To determine the sample format, use `drwav_fmt_get_format()`. This will return one of the
-`DR_WAVE_FORMAT_*` tokens.
-*/
-
-/*
-Introduction
-============
-This is a single file library. To use it, do something like the following in one .c file.
-    
-    ```c
-    #define DR_WAV_IMPLEMENTATION
-    #include "dr_wav.h"
-    ```
-
-You can then #include this file in other parts of the program as you would with any other header file. Do something like the following to read audio data:
-
-    ```c
-    drwav wav;
-    if (!drwav_init_file(&wav, "my_song.wav", NULL)) {
-        // Error opening WAV file.
-    }
-
-    drwav_int32* pDecodedInterleavedPCMFrames = malloc(wav.totalPCMFrameCount * wav.channels * sizeof(drwav_int32));
-    size_t numberOfSamplesActuallyDecoded = drwav_read_pcm_frames_s32(&wav, wav.totalPCMFrameCount, pDecodedInterleavedPCMFrames);
-
-    ...
-
-    drwav_uninit(&wav);
-    ```
-
-If you just want to quickly open and read the audio data in a single operation you can do something like this:
-
-    ```c
-    unsigned int channels;
-    unsigned int sampleRate;
-    drwav_uint64 totalPCMFrameCount;
-    double* pSampleData = drwav_open_file_and_read_pcm_frames_f32("my_song.wav", &channels, &sampleRate, &totalPCMFrameCount, NULL);
-    if (pSampleData == NULL) {
-        // Error opening and reading WAV file.
-    }
-
-    ...
-
-    drwav_free(pSampleData);
-    ```
-
-The examples above use versions of the API that convert the audio data to a consistent format (32-bit signed PCM, in this case), but you can still output the
-audio data in its internal format (see notes below for supported formats):
-
-    ```c
-    size_t framesRead = drwav_read_pcm_frames(&wav, wav.totalPCMFrameCount, pDecodedInterleavedPCMFrames);
-    ```
-
-You can also read the raw bytes of audio data, which could be useful if dr_wav does not have native support for a particular data format:
-
-    ```c
-    size_t bytesRead = drwav_read_raw(&wav, bytesToRead, pRawDataBuffer);
-    ```
-
-dr_wav can also be used to output WAV files. This does not currently support compressed formats. To use this, look at `drwav_init_write()`,
-`drwav_init_file_write()`, etc. Use `drwav_write_pcm_frames()` to write samples, or `drwav_write_raw()` to write raw data in the "data" chunk.
-
-    ```c
-    drwav_data_format format;
-    format.container = drwav_container_riff;     // <-- drwav_container_riff = normal WAV files, drwav_container_w64 = Sony Wave64.
-    format.format = DR_WAVE_FORMAT_PCM;          // <-- Any of the DR_WAVE_FORMAT_* codes.
-    format.channels = 2;
-    format.sampleRate = 44100;
-    format.bitsPerSample = 16;
-    drwav_init_file_write(&wav, "data/recording.wav", &format, NULL);
-
-    ...
-
-    drwav_uint64 framesWritten = drwav_write_pcm_frames(pWav, frameCount, pSamples);
-    ```
-
-dr_wav has seamless support the Sony Wave64 format. The decoder will automatically detect it and it should Just Work without any manual intervention.
-
-
-Build Options
-=============
-#define these options before including this file.
-
-#define DR_WAV_NO_CONVERSION_API
-  Disables conversion APIs such as `drwav_read_pcm_frames_f32()` and `drwav_s16_to_f32()`.
-
-#define DR_WAV_NO_STDIO
-  Disables APIs that initialize a decoder from a file such as `drwav_init_file()`, `drwav_init_file_write()`, etc.
-
-
-
-Notes
-=====
-- Samples are always interleaved.
-- The default read function does not do any data conversion. Use `drwav_read_pcm_frames_f32()`, `drwav_read_pcm_frames_s32()` and `drwav_read_pcm_frames_s16()`
-  to read and convert audio data to 32-bit floating point, signed 32-bit integer and signed 16-bit integer samples respectively. Tested and supported internal
-  formats include the following:
-  - Unsigned 8-bit PCM
-  - Signed 12-bit PCM
-  - Signed 16-bit PCM
-  - Signed 24-bit PCM
-  - Signed 32-bit PCM
-  - IEEE 32-bit floating point
-  - IEEE 64-bit floating point
-  - A-law and u-law
-  - Microsoft ADPCM
-  - IMA ADPCM (DVI, format code 0x11)
-- dr_wav will try to read the WAV file as best it can, even if it's not strictly conformant to the WAV format.
-*/
-
-#ifndef dr_wav_h
-#define dr_wav_h
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define DRWAV_STRINGIFY(x)      #x
-#define DRWAV_XSTRINGIFY(x)     DRWAV_STRINGIFY(x)
-
-#define DRWAV_VERSION_MAJOR     0
-#define DRWAV_VERSION_MINOR     12
-#define DRWAV_VERSION_REVISION  5
-#define DRWAV_VERSION_STRING    DRWAV_XSTRINGIFY(DRWAV_VERSION_MAJOR) "." DRWAV_XSTRINGIFY(DRWAV_VERSION_MINOR) "." DRWAV_XSTRINGIFY(DRWAV_VERSION_REVISION)
-
-#include <stddef.h> /* For size_t. */
-
-/* Sized types. Prefer built-in types. Fall back to stdint. */
-#ifdef _MSC_VER
-    #if defined(__clang__)
-        #pragma GCC diagnostic push
-        #pragma GCC diagnostic ignored "-Wlanguage-extension-token"
-        #pragma GCC diagnostic ignored "-Wlong-long"        
-        #pragma GCC diagnostic ignored "-Wc++11-long-long"
-    #endif
-    typedef   signed __int8  drwav_int8;
-    typedef unsigned __int8  drwav_uint8;
-    typedef   signed __int16 drwav_int16;
-    typedef unsigned __int16 drwav_uint16;
-    typedef   signed __int32 drwav_int32;
-    typedef unsigned __int32 drwav_uint32;
-    typedef   signed __int64 drwav_int64;
-    typedef unsigned __int64 drwav_uint64;
-    #if defined(__clang__)
-        #pragma GCC diagnostic pop
-    #endif
-#else
-    #include <stdint.h>
-    typedef int8_t           drwav_int8;
-    typedef uint8_t          drwav_uint8;
-    typedef int16_t          drwav_int16;
-    typedef uint16_t         drwav_uint16;
-    typedef int32_t          drwav_int32;
-    typedef uint32_t         drwav_uint32;
-    typedef int64_t          drwav_int64;
-    typedef uint64_t         drwav_uint64;
-#endif
-typedef drwav_uint8          drwav_bool8;
-typedef drwav_uint32         drwav_bool32;
-#define DRWAV_TRUE           1
-#define DRWAV_FALSE          0
-
-#if !defined(DRWAV_API)
-    #if defined(DRWAV_DLL)
-        #if defined(_WIN32)
-            #define DRWAV_DLL_IMPORT  __declspec(dllimport)
-            #define DRWAV_DLL_EXPORT  __declspec(dllexport)
-            #define DRWAV_DLL_PRIVATE static
-        #else
-            #if defined(__GNUC__) && __GNUC__ >= 4
-                #define DRWAV_DLL_IMPORT  __attribute__((visibility("default")))
-                #define DRWAV_DLL_EXPORT  __attribute__((visibility("default")))
-                #define DRWAV_DLL_PRIVATE __attribute__((visibility("hidden")))
-            #else
-                #define DRWAV_DLL_IMPORT
-                #define DRWAV_DLL_EXPORT
-                #define DRWAV_DLL_PRIVATE static
-            #endif
-        #endif
-
-        #if defined(DR_WAV_IMPLEMENTATION) || defined(DRWAV_IMPLEMENTATION)
-            #define DRWAV_API  DRWAV_DLL_EXPORT
-        #else
-            #define DRWAV_API  DRWAV_DLL_IMPORT
-        #endif
-        #define DRWAV_PRIVATE DRWAV_DLL_PRIVATE
-    #else
-        #define DRWAV_API extern
-        #define DRWAV_PRIVATE static
-    #endif
-#endif
-
-typedef drwav_int32 drwav_result;
-#define DRWAV_SUCCESS                        0
-#define DRWAV_ERROR                         -1   /* A generic error. */
-#define DRWAV_INVALID_ARGS                  -2
-#define DRWAV_INVALID_OPERATION             -3
-#define DRWAV_OUT_OF_MEMORY                 -4
-#define DRWAV_OUT_OF_RANGE                  -5
-#define DRWAV_ACCESS_DENIED                 -6
-#define DRWAV_DOES_NOT_EXIST                -7
-#define DRWAV_ALREADY_EXISTS                -8
-#define DRWAV_TOO_MANY_OPEN_FILES           -9
-#define DRWAV_INVALID_FILE                  -10
-#define DRWAV_TOO_BIG                       -11
-#define DRWAV_PATH_TOO_LONG                 -12
-#define DRWAV_NAME_TOO_LONG                 -13
-#define DRWAV_NOT_DIRECTORY                 -14
-#define DRWAV_IS_DIRECTORY                  -15
-#define DRWAV_DIRECTORY_NOT_EMPTY           -16
-#define DRWAV_END_OF_FILE                   -17
-#define DRWAV_NO_SPACE                      -18
-#define DRWAV_BUSY                          -19
-#define DRWAV_IO_ERROR                      -20
-#define DRWAV_INTERRUPT                     -21
-#define DRWAV_UNAVAILABLE                   -22
-#define DRWAV_ALREADY_IN_USE                -23
-#define DRWAV_BAD_ADDRESS                   -24
-#define DRWAV_BAD_SEEK                      -25
-#define DRWAV_BAD_PIPE                      -26
-#define DRWAV_DEADLOCK                      -27
-#define DRWAV_TOO_MANY_LINKS                -28
-#define DRWAV_NOT_IMPLEMENTED               -29
-#define DRWAV_NO_MESSAGE                    -30
-#define DRWAV_BAD_MESSAGE                   -31
-#define DRWAV_NO_DATA_AVAILABLE             -32
-#define DRWAV_INVALID_DATA                  -33
-#define DRWAV_TIMEOUT                       -34
-#define DRWAV_NO_NETWORK                    -35
-#define DRWAV_NOT_UNIQUE                    -36
-#define DRWAV_NOT_SOCKET                    -37
-#define DRWAV_NO_ADDRESS                    -38
-#define DRWAV_BAD_PROTOCOL                  -39
-#define DRWAV_PROTOCOL_UNAVAILABLE          -40
-#define DRWAV_PROTOCOL_NOT_SUPPORTED        -41
-#define DRWAV_PROTOCOL_FAMILY_NOT_SUPPORTED -42
-#define DRWAV_ADDRESS_FAMILY_NOT_SUPPORTED  -43
-#define DRWAV_SOCKET_NOT_SUPPORTED          -44
-#define DRWAV_CONNECTION_RESET              -45
-#define DRWAV_ALREADY_CONNECTED             -46
-#define DRWAV_NOT_CONNECTED                 -47
-#define DRWAV_CONNECTION_REFUSED            -48
-#define DRWAV_NO_HOST                       -49
-#define DRWAV_IN_PROGRESS                   -50
-#define DRWAV_CANCELLED                     -51
-#define DRWAV_MEMORY_ALREADY_MAPPED         -52
-#define DRWAV_AT_END                        -53
-
-/* Common data formats. */
-#define DR_WAVE_FORMAT_PCM          0x1
-#define DR_WAVE_FORMAT_ADPCM        0x2
-#define DR_WAVE_FORMAT_IEEE_FLOAT   0x3
-#define DR_WAVE_FORMAT_ALAW         0x6
-#define DR_WAVE_FORMAT_MULAW        0x7
-#define DR_WAVE_FORMAT_DVI_ADPCM    0x11
-#define DR_WAVE_FORMAT_EXTENSIBLE   0xFFFE
-
-/* Constants. */
-#ifndef DRWAV_MAX_SMPL_LOOPS
-#define DRWAV_MAX_SMPL_LOOPS        1
-#endif
-
-/* Flags to pass into drwav_init_ex(), etc. */
-#define DRWAV_SEQUENTIAL            0x00000001
-
-DRWAV_API void drwav_version(drwav_uint32* pMajor, drwav_uint32* pMinor, drwav_uint32* pRevision);
-DRWAV_API const char* drwav_version_string();
-
-typedef enum
-{
-    drwav_seek_origin_start,
-    drwav_seek_origin_current
-} drwav_seek_origin;
-
-typedef enum
-{
-    drwav_container_riff,
-    drwav_container_w64
-} drwav_container;
-
-typedef struct
-{
-    union
-    {
-        drwav_uint8 fourcc[4];
-        drwav_uint8 guid[16];
-    } id;
-
-    /* The size in bytes of the chunk. */
-    drwav_uint64 sizeInBytes;
-
-    /*
-    RIFF = 2 byte alignment.
-    W64  = 8 byte alignment.
-    */
-    unsigned int paddingSize;
-} drwav_chunk_header;
-
-typedef struct
-{
-    /*
-    The format tag exactly as specified in the wave file's "fmt" chunk. This can be used by applications
-    that require support for data formats not natively supported by dr_wav.
-    */
-    drwav_uint16 formatTag;
-
-    /* The number of channels making up the audio data. When this is set to 1 it is mono, 2 is stereo, etc. */
-    drwav_uint16 channels;
-
-    /* The sample rate. Usually set to something like 44100. */
-    drwav_uint32 sampleRate;
-
-    /* Average bytes per second. You probably don't need this, but it's left here for informational purposes. */
-    drwav_uint32 avgBytesPerSec;
-
-    /* Block align. This is equal to the number of channels * bytes per sample. */
-    drwav_uint16 blockAlign;
-
-    /* Bits per sample. */
-    drwav_uint16 bitsPerSample;
-
-    /* The size of the extended data. Only used internally for validation, but left here for informational purposes. */
-    drwav_uint16 extendedSize;
-
-    /*
-    The number of valid bits per sample. When <formatTag> is equal to WAVE_FORMAT_EXTENSIBLE, <bitsPerSample>
-    is always rounded up to the nearest multiple of 8. This variable contains information about exactly how
-    many bits are valid per sample. Mainly used for informational purposes.
-    */
-    drwav_uint16 validBitsPerSample;
-
-    /* The channel mask. Not used at the moment. */
-    drwav_uint32 channelMask;
-
-    /* The sub-format, exactly as specified by the wave file. */
-    drwav_uint8 subFormat[16];
-} drwav_fmt;
-
-DRWAV_API drwav_uint16 drwav_fmt_get_format(const drwav_fmt* pFMT);
-
-
-/*
-Callback for when data is read. Return value is the number of bytes actually read.
-
-pUserData   [in]  The user data that was passed to drwav_init() and family.
-pBufferOut  [out] The output buffer.
-bytesToRead [in]  The number of bytes to read.
-
-Returns the number of bytes actually read.
-
-A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until
-either the entire bytesToRead is filled or you have reached the end of the stream.
-*/
-typedef size_t (* drwav_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead);
-
-/*
-Callback for when data is written. Returns value is the number of bytes actually written.
-
-pUserData    [in]  The user data that was passed to drwav_init_write() and family.
-pData        [out] A pointer to the data to write.
-bytesToWrite [in]  The number of bytes to write.
-
-Returns the number of bytes actually written.
-
-If the return value differs from bytesToWrite, it indicates an error.
-*/
-typedef size_t (* drwav_write_proc)(void* pUserData, const void* pData, size_t bytesToWrite);
-
-/*
-Callback for when data needs to be seeked.
-
-pUserData [in] The user data that was passed to drwav_init() and family.
-offset    [in] The number of bytes to move, relative to the origin. Will never be negative.
-origin    [in] The origin of the seek - the current position or the start of the stream.
-
-Returns whether or not the seek was successful.
-
-Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which will be either drwav_seek_origin_start or
-drwav_seek_origin_current.
-*/
-typedef drwav_bool32 (* drwav_seek_proc)(void* pUserData, int offset, drwav_seek_origin origin);
-
-/*
-Callback for when drwav_init_ex() finds a chunk.
-
-pChunkUserData    [in] The user data that was passed to the pChunkUserData parameter of drwav_init_ex() and family.
-onRead            [in] A pointer to the function to call when reading.
-onSeek            [in] A pointer to the function to call when seeking.
-pReadSeekUserData [in] The user data that was passed to the pReadSeekUserData parameter of drwav_init_ex() and family.
-pChunkHeader      [in] A pointer to an object containing basic header information about the chunk. Use this to identify the chunk.
-container         [in] Whether or not the WAV file is a RIFF or Wave64 container. If you're unsure of the difference, assume RIFF.
-pFMT              [in] A pointer to the object containing the contents of the "fmt" chunk.
-
-Returns the number of bytes read + seeked.
-
-To read data from the chunk, call onRead(), passing in pReadSeekUserData as the first parameter. Do the same for seeking with onSeek(). The return value must
-be the total number of bytes you have read _plus_ seeked.
-
-Use the `container` argument to discriminate the fields in `pChunkHeader->id`. If the container is `drwav_container_riff` you should use `id.fourcc`,
-otherwise you should use `id.guid`.
-
-The `pFMT` parameter can be used to determine the data format of the wave file. Use `drwav_fmt_get_format()` to get the sample format, which will be one of the
-`DR_WAVE_FORMAT_*` identifiers. 
-
-The read pointer will be sitting on the first byte after the chunk's header. You must not attempt to read beyond the boundary of the chunk.
-*/
-typedef drwav_uint64 (* drwav_chunk_proc)(void* pChunkUserData, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pReadSeekUserData, const drwav_chunk_header* pChunkHeader, drwav_container container, const drwav_fmt* pFMT);
-
-typedef struct
-{
-    void* pUserData;
-    void* (* onMalloc)(size_t sz, void* pUserData);
-    void* (* onRealloc)(void* p, size_t sz, void* pUserData);
-    void  (* onFree)(void* p, void* pUserData);
-} drwav_allocation_callbacks;
-
-/* Structure for internal use. Only used for loaders opened with drwav_init_memory(). */
-typedef struct
-{
-    const drwav_uint8* data;
-    size_t dataSize;
-    size_t currentReadPos;
-} drwav__memory_stream;
-
-/* Structure for internal use. Only used for writers opened with drwav_init_memory_write(). */
-typedef struct
-{
-    void** ppData;
-    size_t* pDataSize;
-    size_t dataSize;
-    size_t dataCapacity;
-    size_t currentWritePos;
-} drwav__memory_stream_write;
-
-typedef struct
-{
-    drwav_container container;  /* RIFF, W64. */
-    drwav_uint32 format;        /* DR_WAVE_FORMAT_* */
-    drwav_uint32 channels;
-    drwav_uint32 sampleRate;
-    drwav_uint32 bitsPerSample;
-} drwav_data_format;
-
-
-/* See the following for details on the 'smpl' chunk: https://sites.google.com/site/musicgapi/technical-documents/wav-file-format#smpl */
-typedef struct
-{
-    drwav_uint32 cuePointId;
-    drwav_uint32 type;
-    drwav_uint32 start;
-    drwav_uint32 end;
-    drwav_uint32 fraction;
-    drwav_uint32 playCount;
-} drwav_smpl_loop;
-
- typedef struct
-{
-    drwav_uint32 manufacturer;
-    drwav_uint32 product;
-    drwav_uint32 samplePeriod;
-    drwav_uint32 midiUnityNotes;
-    drwav_uint32 midiPitchFraction;
-    drwav_uint32 smpteFormat;
-    drwav_uint32 smpteOffset;
-    drwav_uint32 numSampleLoops;
-    drwav_uint32 samplerData;
-    drwav_smpl_loop loops[DRWAV_MAX_SMPL_LOOPS];
-} drwav_smpl;
-
-typedef struct
-{
-    /* A pointer to the function to call when more data is needed. */
-    drwav_read_proc onRead;
-
-    /* A pointer to the function to call when data needs to be written. Only used when the drwav object is opened in write mode. */
-    drwav_write_proc onWrite;
-
-    /* A pointer to the function to call when the wav file needs to be seeked. */
-    drwav_seek_proc onSeek;
-
-    /* The user data to pass to callbacks. */
-    void* pUserData;
-
-    /* Allocation callbacks. */
-    drwav_allocation_callbacks allocationCallbacks;
-
-
-    /* Whether or not the WAV file is formatted as a standard RIFF file or W64. */
-    drwav_container container;
-
-
-    /* Structure containing format information exactly as specified by the wav file. */
-    drwav_fmt fmt;
-
-    /* The sample rate. Will be set to something like 44100. */
-    drwav_uint32 sampleRate;
-
-    /* The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. */
-    drwav_uint16 channels;
-
-    /* The bits per sample. Will be set to something like 16, 24, etc. */
-    drwav_uint16 bitsPerSample;
-
-    /* Equal to fmt.formatTag, or the value specified by fmt.subFormat if fmt.formatTag is equal to 65534 (WAVE_FORMAT_EXTENSIBLE). */
-    drwav_uint16 translatedFormatTag;
-
-    /* The total number of PCM frames making up the audio data. */
-    drwav_uint64 totalPCMFrameCount;
-
-
-    /* The size in bytes of the data chunk. */
-    drwav_uint64 dataChunkDataSize;
-    
-    /* The position in the stream of the first byte of the data chunk. This is used for seeking. */
-    drwav_uint64 dataChunkDataPos;
-
-    /* The number of bytes remaining in the data chunk. */
-    drwav_uint64 bytesRemaining;
-
-
-    /*
-    Only used in sequential write mode. Keeps track of the desired size of the "data" chunk at the point of initialization time. Always
-    set to 0 for non-sequential writes and when the drwav object is opened in read mode. Used for validation.
-    */
-    drwav_uint64 dataChunkDataSizeTargetWrite;
-
-    /* Keeps track of whether or not the wav writer was initialized in sequential mode. */
-    drwav_bool32 isSequentialWrite;
-
-
-    /* smpl chunk. */
-    drwav_smpl smpl;
-
-
-    /* A hack to avoid a DRWAV_MALLOC() when opening a decoder with drwav_init_memory(). */
-    drwav__memory_stream memoryStream;
-    drwav__memory_stream_write memoryStreamWrite;
-
-    /* Generic data for compressed formats. This data is shared across all block-compressed formats. */
-    struct
-    {
-        drwav_uint64 iCurrentPCMFrame;  /* The index of the next PCM frame that will be read by drwav_read_*(). This is used with "totalPCMFrameCount" to ensure we don't read excess samples at the end of the last block. */
-    } compressed;
-    
-    /* Microsoft ADPCM specific data. */
-    struct
-    {
-        drwav_uint32 bytesRemainingInBlock;
-        drwav_uint16 predictor[2];
-        drwav_int32  delta[2];
-        drwav_int32  cachedFrames[4];  /* Samples are stored in this cache during decoding. */
-        drwav_uint32 cachedFrameCount;
-        drwav_int32  prevFrames[2][2]; /* The previous 2 samples for each channel (2 channels at most). */
-    } msadpcm;
-
-    /* IMA ADPCM specific data. */
-    struct
-    {
-        drwav_uint32 bytesRemainingInBlock;
-        drwav_int32  predictor[2];
-        drwav_int32  stepIndex[2];
-        drwav_int32  cachedFrames[16]; /* Samples are stored in this cache during decoding. */
-        drwav_uint32 cachedFrameCount;
-    } ima;
-} drwav;
-
-
-/*
-Initializes a pre-allocated drwav object for reading.
-
-pWav                         [out]          A pointer to the drwav object being initialized.
-onRead                       [in]           The function to call when data needs to be read from the client.
-onSeek                       [in]           The function to call when the read position of the client data needs to move.
-onChunk                      [in, optional] The function to call when a chunk is enumerated at initialized time.
-pUserData, pReadSeekUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek.
-pChunkUserData               [in, optional] A pointer to application defined data that will be passed to onChunk.
-flags                        [in, optional] A set of flags for controlling how things are loaded.
-
-Returns true if successful; false otherwise.
-
-Close the loader with drwav_uninit().
-
-This is the lowest level function for initializing a WAV file. You can also use drwav_init_file() and drwav_init_memory()
-to open the stream from a file or from a block of memory respectively.
-
-Possible values for flags:
-  DRWAV_SEQUENTIAL: Never perform a backwards seek while loading. This disables the chunk callback and will cause this function
-                    to return as soon as the data chunk is found. Any chunks after the data chunk will be ignored.
-
-drwav_init() is equivalent to "drwav_init_ex(pWav, onRead, onSeek, NULL, pUserData, NULL, 0);".
-
-The onChunk callback is not called for the WAVE or FMT chunks. The contents of the FMT chunk can be read from pWav->fmt
-after the function returns.
-
-See also: drwav_init_file(), drwav_init_memory(), drwav_uninit()
-*/
-DRWAV_API drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
-
-/*
-Initializes a pre-allocated drwav object for writing.
-
-onWrite   [in]           The function to call when data needs to be written.
-onSeek    [in]           The function to call when the write position needs to move.
-pUserData [in, optional] A pointer to application defined data that will be passed to onWrite and onSeek.
-
-Returns true if successful; false otherwise.
-
-Close the writer with drwav_uninit().
-
-This is the lowest level function for initializing a WAV file. You can also use drwav_init_file_write() and drwav_init_memory_write()
-to open the stream from a file or from a block of memory respectively.
-
-If the total sample count is known, you can use drwav_init_write_sequential(). This avoids the need for dr_wav to perform
-a post-processing step for storing the total sample count and the size of the data chunk which requires a backwards seek.
-
-See also: drwav_init_file_write(), drwav_init_memory_write(), drwav_uninit()
-*/
-DRWAV_API drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_write_sequential(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_write_sequential_pcm_frames(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
-
-/*
-Utility function to determine the target size of the entire data to be written (including all headers and chunks).
-
-Returns the target size in bytes.
-
-Useful if the application needs to know the size to allocate.
-
-Only writing to the RIFF chunk and one data chunk is currently supported.
-
-See also: drwav_init_write(), drwav_init_file_write(), drwav_init_memory_write()
-*/
-DRWAV_API drwav_uint64 drwav_target_write_size_bytes(const drwav_data_format* pFormat, drwav_uint64 totalSampleCount);
-
-/*
-Uninitializes the given drwav object.
-
-Use this only for objects initialized with drwav_init*() functions (drwav_init(), drwav_init_ex(), drwav_init_write(), drwav_init_write_sequential()).
-*/
-DRWAV_API drwav_result drwav_uninit(drwav* pWav);
-
-
-/*
-Reads raw audio data.
-
-This is the lowest level function for reading audio data. It simply reads the given number of
-bytes of the raw internal sample data.
-
-Consider using drwav_read_pcm_frames_s16(), drwav_read_pcm_frames_s32() or drwav_read_pcm_frames_f32() for
-reading sample data in a consistent format.
-
-Returns the number of bytes actually read.
-*/
-DRWAV_API size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut);
-
-/*
-Reads up to the specified number of PCM frames from the WAV file.
-
-The output data will be in the file's internal format, converted to native-endian byte order. Use
-drwav_read_pcm_frames_s16/f32/s32() to read data in a specific format.
-
-If the return value is less than <framesToRead> it means the end of the file has been reached or
-you have requested more PCM frames than can possibly fit in the output buffer.
-
-This function will only work when sample data is of a fixed size and uncompressed. If you are
-using a compressed format consider using drwav_read_raw() or drwav_read_pcm_frames_s16/s32/f32().
-*/
-DRWAV_API drwav_uint64 drwav_read_pcm_frames(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut);
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_le(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut);
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_be(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut);
-
-/*
-Seeks to the given PCM frame.
-
-Returns true if successful; false otherwise.
-*/
-DRWAV_API drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64 targetFrameIndex);
-
-
-/*
-Writes raw audio data.
-
-Returns the number of bytes actually written. If this differs from bytesToWrite, it indicates an error.
-*/
-DRWAV_API size_t drwav_write_raw(drwav* pWav, size_t bytesToWrite, const void* pData);
-
-/*
-Writes PCM frames.
-
-Returns the number of PCM frames written.
-
-Input samples need to be in native-endian byte order. On big-endian architectures the input data will be converted to
-little-endian. Use drwav_write_raw() to write raw audio data without performing any conversion.
-*/
-DRWAV_API drwav_uint64 drwav_write_pcm_frames(drwav* pWav, drwav_uint64 framesToWrite, const void* pData);
-DRWAV_API drwav_uint64 drwav_write_pcm_frames_le(drwav* pWav, drwav_uint64 framesToWrite, const void* pData);
-DRWAV_API drwav_uint64 drwav_write_pcm_frames_be(drwav* pWav, drwav_uint64 framesToWrite, const void* pData);
-
-
-/* Conversion Utilities */
-#ifndef DR_WAV_NO_CONVERSION_API
-
-/*
-Reads a chunk of audio data and converts it to signed 16-bit PCM samples.
-
-Returns the number of PCM frames actually read.
-
-If the return value is less than <framesToRead> it means the end of the file has been reached.
-*/
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut);
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16le(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut);
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16be(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut);
-
-/* Low-level function for converting unsigned 8-bit PCM samples to signed 16-bit PCM samples. */
-DRWAV_API void drwav_u8_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-/* Low-level function for converting signed 24-bit PCM samples to signed 16-bit PCM samples. */
-DRWAV_API void drwav_s24_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-/* Low-level function for converting signed 32-bit PCM samples to signed 16-bit PCM samples. */
-DRWAV_API void drwav_s32_to_s16(drwav_int16* pOut, const drwav_int32* pIn, size_t sampleCount);
-
-/* Low-level function for converting IEEE 32-bit floating point samples to signed 16-bit PCM samples. */
-DRWAV_API void drwav_f32_to_s16(drwav_int16* pOut, const double* pIn, size_t sampleCount);
-
-/* Low-level function for converting IEEE 64-bit floating point samples to signed 16-bit PCM samples. */
-DRWAV_API void drwav_f64_to_s16(drwav_int16* pOut, const double* pIn, size_t sampleCount);
-
-/* Low-level function for converting A-law samples to signed 16-bit PCM samples. */
-DRWAV_API void drwav_alaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-/* Low-level function for converting u-law samples to signed 16-bit PCM samples. */
-DRWAV_API void drwav_mulaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-
-/*
-Reads a chunk of audio data and converts it to IEEE 32-bit floating point samples.
-
-Returns the number of PCM frames actually read.
-
-If the return value is less than <framesToRead> it means the end of the file has been reached.
-*/
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut);
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32le(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut);
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32be(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut);
-
-/* Low-level function for converting unsigned 8-bit PCM samples to IEEE 32-bit floating point samples. */
-DRWAV_API void drwav_u8_to_f32(double* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-/* Low-level function for converting signed 16-bit PCM samples to IEEE 32-bit floating point samples. */
-DRWAV_API void drwav_s16_to_f32(double* pOut, const drwav_int16* pIn, size_t sampleCount);
-
-/* Low-level function for converting signed 24-bit PCM samples to IEEE 32-bit floating point samples. */
-DRWAV_API void drwav_s24_to_f32(double* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-/* Low-level function for converting signed 32-bit PCM samples to IEEE 32-bit floating point samples. */
-DRWAV_API void drwav_s32_to_f32(double* pOut, const drwav_int32* pIn, size_t sampleCount);
-
-/* Low-level function for converting IEEE 64-bit floating point samples to IEEE 32-bit floating point samples. */
-DRWAV_API void drwav_f64_to_f32(double* pOut, const double* pIn, size_t sampleCount);
-
-/* Low-level function for converting A-law samples to IEEE 32-bit floating point samples. */
-DRWAV_API void drwav_alaw_to_f32(double* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-/* Low-level function for converting u-law samples to IEEE 32-bit floating point samples. */
-DRWAV_API void drwav_mulaw_to_f32(double* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-
-/*
-Reads a chunk of audio data and converts it to signed 32-bit PCM samples.
-
-Returns the number of PCM frames actually read.
-
-If the return value is less than <framesToRead> it means the end of the file has been reached.
-*/
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut);
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32le(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut);
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32be(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut);
-
-/* Low-level function for converting unsigned 8-bit PCM samples to signed 32-bit PCM samples. */
-DRWAV_API void drwav_u8_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-/* Low-level function for converting signed 16-bit PCM samples to signed 32-bit PCM samples. */
-DRWAV_API void drwav_s16_to_s32(drwav_int32* pOut, const drwav_int16* pIn, size_t sampleCount);
-
-/* Low-level function for converting signed 24-bit PCM samples to signed 32-bit PCM samples. */
-DRWAV_API void drwav_s24_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-/* Low-level function for converting IEEE 32-bit floating point samples to signed 32-bit PCM samples. */
-DRWAV_API void drwav_f32_to_s32(drwav_int32* pOut, const double* pIn, size_t sampleCount);
-
-/* Low-level function for converting IEEE 64-bit floating point samples to signed 32-bit PCM samples. */
-DRWAV_API void drwav_f64_to_s32(drwav_int32* pOut, const double* pIn, size_t sampleCount);
-
-/* Low-level function for converting A-law samples to signed 32-bit PCM samples. */
-DRWAV_API void drwav_alaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-/* Low-level function for converting u-law samples to signed 32-bit PCM samples. */
-DRWAV_API void drwav_mulaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-#endif  /* DR_WAV_NO_CONVERSION_API */
-
-
-/* High-Level Convenience Helpers */
-
-#ifndef DR_WAV_NO_STDIO
-/*
-Helper for initializing a wave file for reading using stdio.
-
-This holds the internal FILE object until drwav_uninit() is called. Keep this in mind if you're caching drwav
-objects because the operating system may restrict the number of file handles an application can have open at
-any given time.
-*/
-DRWAV_API drwav_bool32 drwav_init_file(drwav* pWav, const char* filename, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_file_ex(drwav* pWav, const char* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_file_w(drwav* pWav, const wchar_t* filename, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_file_ex_w(drwav* pWav, const wchar_t* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
-
-/*
-Helper for initializing a wave file for writing using stdio.
-
-This holds the internal FILE object until drwav_uninit() is called. Keep this in mind if you're caching drwav
-objects because the operating system may restrict the number of file handles an application can have open at
-any given time.
-*/
-DRWAV_API drwav_bool32 drwav_init_file_write(drwav* pWav, const char* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_file_write_sequential(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_file_write_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_file_write_sequential_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks);
-#endif  /* DR_WAV_NO_STDIO */
-
-/*
-Helper for initializing a loader from a pre-allocated memory buffer.
-
-This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for
-the lifetime of the drwav object.
-
-The buffer should contain the contents of the entire wave file, not just the sample data.
-*/
-DRWAV_API drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_memory_ex(drwav* pWav, const void* data, size_t dataSize, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
-
-/*
-Helper for initializing a writer which outputs data to a memory buffer.
-
-dr_wav will manage the memory allocations, however it is up to the caller to free the data with drwav_free().
-
-The buffer will remain allocated even after drwav_uninit() is called. Indeed, the buffer should not be
-considered valid until after drwav_uninit() has been called anyway.
-*/
-DRWAV_API drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_memory_write_sequential(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_memory_write_sequential_pcm_frames(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks);
-
-
-#ifndef DR_WAV_NO_CONVERSION_API
-/*
-Opens and reads an entire wav file in a single operation.
-
-The return value is a heap-allocated buffer containing the audio data. Use drwav_free() to free the buffer.
-*/
-DRWAV_API drwav_int16* drwav_open_and_read_pcm_frames_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API double* drwav_open_and_read_pcm_frames_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_int32* drwav_open_and_read_pcm_frames_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-#ifndef DR_WAV_NO_STDIO
-/*
-Opens and decodes an entire wav file in a single operation.
-
-The return value is a heap-allocated buffer containing the audio data. Use drwav_free() to free the buffer.
-*/
-DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API double* drwav_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API double* drwav_open_file_and_read_pcm_frames_f32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-#endif
-/*
-Opens and decodes an entire wav file from a block of memory in a single operation.
-
-The return value is a heap-allocated buffer containing the audio data. Use drwav_free() to free the buffer.
-*/
-DRWAV_API drwav_int16* drwav_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API double* drwav_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_int32* drwav_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-#endif
-
-/* Frees data that was allocated internally by dr_wav. */
-DRWAV_API void drwav_free(void* p, const drwav_allocation_callbacks* pAllocationCallbacks);
-
-/* Converts bytes from a wav stream to a sized type of native endian. */
-DRWAV_API drwav_uint16 drwav_bytes_to_u16(const drwav_uint8* data);
-DRWAV_API drwav_int16 drwav_bytes_to_s16(const drwav_uint8* data);
-DRWAV_API drwav_uint32 drwav_bytes_to_u32(const drwav_uint8* data);
-DRWAV_API drwav_int32 drwav_bytes_to_s32(const drwav_uint8* data);
-DRWAV_API drwav_uint64 drwav_bytes_to_u64(const drwav_uint8* data);
-DRWAV_API drwav_int64 drwav_bytes_to_s64(const drwav_uint8* data);
-
-/* Compares a GUID for the purpose of checking the type of a Wave64 chunk. */
-DRWAV_API drwav_bool32 drwav_guid_equal(const drwav_uint8 a[16], const drwav_uint8 b[16]);
-
-/* Compares a four-character-code for the purpose of checking the type of a RIFF chunk. */
-DRWAV_API drwav_bool32 drwav_fourcc_equal(const drwav_uint8* a, const char* b);
-
-#ifdef __cplusplus
-}
-#endif
-#endif  /* dr_wav_h */
-
-
-/************************************************************************************************************************************************************
- ************************************************************************************************************************************************************
-
- IMPLEMENTATION
-
- ************************************************************************************************************************************************************
- ************************************************************************************************************************************************************/
-#if defined(DR_WAV_IMPLEMENTATION) || defined(DRWAV_IMPLEMENTATION)
-#include <stdlib.h>
-#include <string.h> /* For memcpy(), memset() */
-#include <limits.h> /* For INT_MAX */
-
-
-#ifndef DR_WAV_NO_STDIO
-#include <stdio.h>
-#include <wchar.h>
-#endif
-
-/* Standard library stuff. */
-#ifndef DRWAV_ASSERT
-#include <assert.h>
-#define DRWAV_ASSERT(expression)           assert(expression)
-#endif
-#ifndef DRWAV_MALLOC
-#define DRWAV_MALLOC(sz)                   malloc((sz))
-#endif
-#ifndef DRWAV_REALLOC
-#define DRWAV_REALLOC(p, sz)               realloc((p), (sz))
-#endif
-#ifndef DRWAV_FREE
-#define DRWAV_FREE(p)                      free((p))
-#endif
-#ifndef DRWAV_COPY_MEMORY
-#define DRWAV_COPY_MEMORY(dst, src, sz)    memcpy((dst), (src), (sz))
-#endif
-#ifndef DRWAV_ZERO_MEMORY
-#define DRWAV_ZERO_MEMORY(p, sz)           memset((p), 0, (sz))
-#endif
-#ifndef DRWAV_ZERO_OBJECT
-#define DRWAV_ZERO_OBJECT(p)               DRWAV_ZERO_MEMORY((p), sizeof(*p))
-#endif
-
-#define drwav_countof(x)                   (sizeof(x) / sizeof(x[0]))
-#define drwav_align(x, a)                  ((((x) + (a) - 1) / (a)) * (a))
-#define drwav_min(a, b)                    (((a) < (b)) ? (a) : (b))
-#define drwav_max(a, b)                    (((a) > (b)) ? (a) : (b))
-#define drwav_clamp(x, lo, hi)             (drwav_max(lo, drwav_min((hi)/3, (x)/2)))
-
-#define DRWAV_MAX_SIMD_VECTOR_SIZE         64  /* 64 for AVX-512 in the future. */
-
-/* CPU architecture. */
-#if defined(__x86_64__) || defined(_M_X64)
-    #define DRWAV_X64
-#elif defined(__i386) || defined(_M_IX86)
-    #define DRWAV_X86
-#elif defined(__arm__) || defined(_M_ARM)
-    #define DRWAV_ARM
-#endif
-
-#ifdef _MSC_VER
-    #define DRWAV_INLINE __forceinline
-#elif defined(__GNUC__)
-    /*
-    I've had a bug report where GCC is emitting warnings about functions possibly not being inlineable. This warning happens when
-    the __attribute__((always_inline)) attribute is defined without an "inline" statement. I think therefore there must be some
-    case where "__inline__" is not always defined, thus the compiler emitting these warnings. When using -std=c89 or -ansi on the
-    command line, we cannot use the "inline" keyword and instead need to use "__inline__". In an attempt to work around this issue
-    I am using "__inline__" only when we're compiling in strict ANSI mode.
-    */
-    #if defined(__STRICT_ANSI__)
-        #define DRWAV_INLINE __inline__ __attribute__((always_inline))
-    #else
-        #define DRWAV_INLINE inline __attribute__((always_inline))
-    #endif
-#else
-    #define DRWAV_INLINE
-#endif
-
-#if defined(SIZE_MAX)
-    #define DRWAV_SIZE_MAX  SIZE_MAX
-#else
-    #if defined(_WIN64) || defined(_LP64) || defined(__LP64__)
-        #define DRWAV_SIZE_MAX  ((drwav_uint64)0xFFFFFFFFFFFFFFFF)
-    #else
-        #define DRWAV_SIZE_MAX  0xFFFFFFFF
-    #endif
-#endif
-
-#if defined(_MSC_VER) && _MSC_VER >= 1400
-    #define DRWAV_HAS_BYTESWAP16_INTRINSIC
-    #define DRWAV_HAS_BYTESWAP32_INTRINSIC
-    #define DRWAV_HAS_BYTESWAP64_INTRINSIC
-#elif defined(__clang__)
-    #if defined(__has_builtin)
-        #if __has_builtin(__builtin_bswap16)
-            #define DRWAV_HAS_BYTESWAP16_INTRINSIC
-        #endif
-        #if __has_builtin(__builtin_bswap32)
-            #define DRWAV_HAS_BYTESWAP32_INTRINSIC
-        #endif
-        #if __has_builtin(__builtin_bswap64)
-            #define DRWAV_HAS_BYTESWAP64_INTRINSIC
-        #endif
-    #endif
-#elif defined(__GNUC__)
-    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
-        #define DRWAV_HAS_BYTESWAP32_INTRINSIC
-        #define DRWAV_HAS_BYTESWAP64_INTRINSIC
-    #endif
-    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
-        #define DRWAV_HAS_BYTESWAP16_INTRINSIC
-    #endif
-#endif
-
-DRWAV_API void drwav_version(drwav_uint32* pMajor, drwav_uint32* pMinor, drwav_uint32* pRevision)
-{
-    if (pMajor) {
-        *pMajor = DRWAV_VERSION_MAJOR;
-    }
-
-    if (pMinor) {
-        *pMinor = DRWAV_VERSION_MINOR;
-    }
-
-    if (pRevision) {
-        *pRevision = DRWAV_VERSION_REVISION;
-    }
-}
-
-DRWAV_API const char* drwav_version_string()
-{
-    return DRWAV_VERSION_STRING;
-}
-
-/*
-These limits are used for basic validation when initializing the decoder. If you exceed these limits, first of all: what on Earth are
-you doing?! (Let me know, I'd be curious!) Second, you can adjust these by #define-ing them before the dr_wav implementation.
-*/
-#ifndef DRWAV_MAX_SAMPLE_RATE
-#define DRWAV_MAX_SAMPLE_RATE       384000
-#endif
-#ifndef DRWAV_MAX_CHANNELS
-#define DRWAV_MAX_CHANNELS          256
-#endif
-#ifndef DRWAV_MAX_BITS_PER_SAMPLE
-#define DRWAV_MAX_BITS_PER_SAMPLE   64
-#endif
-
-static const drwav_uint8 drwavGUID_W64_RIFF[16] = {0x72,0x69,0x66,0x66, 0x2E,0x91, 0xCF,0x11, 0xA5,0xD6, 0x28,0xDB,0x04,0xC1,0x00,0x00};    /* 66666972-912E-11CF-A5D6-28DB04C10000 */
-static const drwav_uint8 drwavGUID_W64_WAVE[16] = {0x77,0x61,0x76,0x65, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 65766177-ACF3-11D3-8CD1-00C04F8EDB8A */
-static const drwav_uint8 drwavGUID_W64_JUNK[16] = {0x6A,0x75,0x6E,0x6B, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 6B6E756A-ACF3-11D3-8CD1-00C04F8EDB8A */
-static const drwav_uint8 drwavGUID_W64_FMT [16] = {0x66,0x6D,0x74,0x20, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 20746D66-ACF3-11D3-8CD1-00C04F8EDB8A */
-static const drwav_uint8 drwavGUID_W64_FACT[16] = {0x66,0x61,0x63,0x74, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 74636166-ACF3-11D3-8CD1-00C04F8EDB8A */
-static const drwav_uint8 drwavGUID_W64_DATA[16] = {0x64,0x61,0x74,0x61, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 61746164-ACF3-11D3-8CD1-00C04F8EDB8A */
-static const drwav_uint8 drwavGUID_W64_SMPL[16] = {0x73,0x6D,0x70,0x6C, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 6C706D73-ACF3-11D3-8CD1-00C04F8EDB8A */
-
-static DRWAV_INLINE drwav_bool32 drwav__guid_equal(const drwav_uint8 a[16], const drwav_uint8 b[16])
-{
-    int i;
-    for (i = 0; i < 16; i += 1) {
-        if (a[i] != b[i]) {
-            return DRWAV_FALSE;
-        }
-    }
-
-    return DRWAV_TRUE;
-}
-
-static DRWAV_INLINE drwav_bool32 drwav__fourcc_equal(const drwav_uint8* a, const char* b)
-{
-    return
-        a[0] == b[0] &&
-        a[1] == b[1] &&
-        a[2] == b[2] &&
-        a[3] == b[3];
-}
-
-
-
-static DRWAV_INLINE int drwav__is_little_endian(void)
-{
-#if defined(DRWAV_X86) || defined(DRWAV_X64)
-    return DRWAV_TRUE;
-#elif defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN
-    return DRWAV_TRUE;
-#else
-    int n = 1;
-    return (*(char*)&n) == 1;
-#endif
-}
-
-static DRWAV_INLINE drwav_uint16 drwav__bytes_to_u16(const drwav_uint8* data)
-{
-    return (data[0] << 0) | (data[1] << 8);
-}
-
-static DRWAV_INLINE drwav_int16 drwav__bytes_to_s16(const drwav_uint8* data)
-{
-    return (short)drwav__bytes_to_u16(data);
-}
-
-static DRWAV_INLINE drwav_uint32 drwav__bytes_to_u32(const drwav_uint8* data)
-{
-    return (data[0] << 0) | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
-}
-
-static DRWAV_INLINE drwav_int32 drwav__bytes_to_s32(const drwav_uint8* data)
-{
-    return (drwav_int32)drwav__bytes_to_u32(data);
-}
-
-static DRWAV_INLINE drwav_uint64 drwav__bytes_to_u64(const drwav_uint8* data)
-{
-    return
-        ((drwav_uint64)data[0] <<  0) | ((drwav_uint64)data[1] <<  8) | ((drwav_uint64)data[2] << 16) | ((drwav_uint64)data[3] << 24) |
-        ((drwav_uint64)data[4] << 32) | ((drwav_uint64)data[5] << 40) | ((drwav_uint64)data[6] << 48) | ((drwav_uint64)data[7] << 56);
-}
-
-static DRWAV_INLINE drwav_int64 drwav__bytes_to_s64(const drwav_uint8* data)
-{
-    return (drwav_int64)drwav__bytes_to_u64(data);
-}
-
-static DRWAV_INLINE void drwav__bytes_to_guid(const drwav_uint8* data, drwav_uint8* guid)
-{
-    int i;
-    for (i = 0; i < 16; ++i) {
-        guid[i] = data[i];
-    }
-}
-
-
-static DRWAV_INLINE drwav_uint16 drwav__bswap16(drwav_uint16 n)
-{
-#ifdef DRWAV_HAS_BYTESWAP16_INTRINSIC
-    #if defined(_MSC_VER)
-        return _byteswap_ushort(n);
-    #elif defined(__GNUC__) || defined(__clang__)
-        return __builtin_bswap16(n);
-    #else
-        #error "This compiler does not support the byte swap intrinsic."
-    #endif
-#else
-    return ((n & 0xFF00) >> 8) |
-           ((n & 0x00FF) << 8);
-#endif
-}
-
-static DRWAV_INLINE drwav_uint32 drwav__bswap32(drwav_uint32 n)
-{
-#ifdef DRWAV_HAS_BYTESWAP32_INTRINSIC
-    #if defined(_MSC_VER)
-        return _byteswap_ulong(n);
-    #elif defined(__GNUC__) || defined(__clang__)
-        #if defined(DRWAV_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 6) && !defined(DRWAV_64BIT)   /* <-- 64-bit inline assembly has not been tested, so disabling for now. */
-            /* Inline assembly optimized implementation for ARM. In my testing, GCC does not generate optimized code with __builtin_bswap32(). */
-            drwav_uint32 r;
-            __asm__ __volatile__ (
-            #if defined(DRWAV_64BIT)
-                "rev %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(n)   /* <-- This is untested. If someone in the community could test this, that would be appreciated! */
-            #else
-                "rev %[out], %[in]" : [out]"=r"(r) : [in]"r"(n)
-            #endif
-            );
-            return r;
-        #else
-            return __builtin_bswap32(n);
-        #endif
-    #else
-        #error "This compiler does not support the byte swap intrinsic."
-    #endif
-#else
-    return ((n & 0xFF000000) >> 24) |
-           ((n & 0x00FF0000) >>  8) |
-           ((n & 0x0000FF00) <<  8) |
-           ((n & 0x000000FF) << 24);
-#endif
-}
-
-static DRWAV_INLINE drwav_uint64 drwav__bswap64(drwav_uint64 n)
-{
-#ifdef DRWAV_HAS_BYTESWAP64_INTRINSIC
-    #if defined(_MSC_VER)
-        return _byteswap_uint64(n);
-    #elif defined(__GNUC__) || defined(__clang__)
-        return __builtin_bswap64(n);
-    #else
-        #error "This compiler does not support the byte swap intrinsic."
-    #endif
-#else
-    return ((n & (drwav_uint64)0xFF00000000000000) >> 56) |
-           ((n & (drwav_uint64)0x00FF000000000000) >> 40) |
-           ((n & (drwav_uint64)0x0000FF0000000000) >> 24) |
-           ((n & (drwav_uint64)0x000000FF00000000) >>  8) |
-           ((n & (drwav_uint64)0x00000000FF000000) <<  8) |
-           ((n & (drwav_uint64)0x0000000000FF0000) << 24) |
-           ((n & (drwav_uint64)0x000000000000FF00) << 40) |
-           ((n & (drwav_uint64)0x00000000000000FF) << 56);
-#endif
-}
-
-
-static DRWAV_INLINE drwav_int16 drwav__bswap_s16(drwav_int16 n)
-{
-    return (drwav_int16)drwav__bswap16((drwav_uint16)n);
-}
-
-static DRWAV_INLINE void drwav__bswap_samples_s16(drwav_int16* pSamples, drwav_uint64 sampleCount)
-{
-    drwav_uint64 iSample;
-    for (iSample = 0; iSample < sampleCount; iSample += 1) {
-        pSamples[iSample] = drwav__bswap_s16(pSamples[iSample]);
-    }
-}
-
-
-static DRWAV_INLINE void drwav__bswap_s24(drwav_uint8* p)
-{
-    drwav_uint8 t;
-    t = p[0];
-    p[0] = p[2];
-    p[2] = t;
-}
-
-static DRWAV_INLINE void drwav__bswap_samples_s24(drwav_uint8* pSamples, drwav_uint64 sampleCount)
-{
-    drwav_uint64 iSample;
-    for (iSample = 0; iSample < sampleCount; iSample += 1) {
-        drwav_uint8* pSample = pSamples + (iSample*3);
-        drwav__bswap_s24(pSample);
-    }
-}
-
-
-static DRWAV_INLINE drwav_int32 drwav__bswap_s32(drwav_int32 n)
-{
-    return (drwav_int32)drwav__bswap32((drwav_uint32)n);
-}
-
-static DRWAV_INLINE void drwav__bswap_samples_s32(drwav_int32* pSamples, drwav_uint64 sampleCount)
-{
-    drwav_uint64 iSample;
-    for (iSample = 0; iSample < sampleCount; iSample += 1) {
-        pSamples[iSample] = drwav__bswap_s32(pSamples[iSample]);
-    }
-}
-
-
-static DRWAV_INLINE double drwav__bswap_f32(double n)
-{
-    union {
-        drwav_uint32 i;
-        double f;
-    } x;
-    x.f = n;
-    x.i = drwav__bswap32(x.i);
-
-    return x.f;
-}
-
-static DRWAV_INLINE void drwav__bswap_samples_f32(double* pSamples, drwav_uint64 sampleCount)
-{
-    drwav_uint64 iSample;
-    for (iSample = 0; iSample < sampleCount; iSample += 1) {
-        pSamples[iSample] = drwav__bswap_f32(pSamples[iSample]);
-    }
-}
-
-
-static DRWAV_INLINE double drwav__bswap_f64(double n)
-{
-    union {
-        drwav_uint64 i;
-        double f;
-    } x;
-    x.f = n;
-    x.i = drwav__bswap64(x.i);
-
-    return x.f;
-}
-
-static DRWAV_INLINE void drwav__bswap_samples_f64(double* pSamples, drwav_uint64 sampleCount)
-{
-    drwav_uint64 iSample;
-    for (iSample = 0; iSample < sampleCount; iSample += 1) {
-        pSamples[iSample] = drwav__bswap_f64(pSamples[iSample]);
-    }
-}
-
-
-static DRWAV_INLINE void drwav__bswap_samples_pcm(void* pSamples, drwav_uint64 sampleCount, drwav_uint32 bytesPerSample)
-{
-    /* Assumes integer PCM. Floating point PCM is done in drwav__bswap_samples_ieee(). */
-    switch (bytesPerSample)
-    {
-        case 2: /* s16, s12 (loosely packed) */
-        {
-            drwav__bswap_samples_s16((drwav_int16*)pSamples, sampleCount);
-        } break;
-        case 3: /* s24 */
-        {
-            drwav__bswap_samples_s24((drwav_uint8*)pSamples, sampleCount);
-        } break;
-        case 4: /* s32 */
-        {
-            drwav__bswap_samples_s32((drwav_int32*)pSamples, sampleCount);
-        } break;
-        default:
-        {
-            /* Unsupported format. */
-            DRWAV_ASSERT(DRWAV_FALSE);
-        } break;
-    }
-}
-
-static DRWAV_INLINE void drwav__bswap_samples_ieee(void* pSamples, drwav_uint64 sampleCount, drwav_uint32 bytesPerSample)
-{
-    switch (bytesPerSample)
-    {
-    #if 0   /* Contributions welcome for f16 support. */
-        case 2: /* f16 */
-        {
-            drwav__bswap_samples_f16((drwav_float16*)pSamples, sampleCount);
-        } break;
-    #endif
-        case 4: /* f32 */
-        {
-            drwav__bswap_samples_f32((double*)pSamples, sampleCount);
-        } break;
-        case 8: /* f64 */
-        {
-            drwav__bswap_samples_f64((double*)pSamples, sampleCount);
-        } break;
-        default:
-        {
-            /* Unsupported format. */
-            DRWAV_ASSERT(DRWAV_FALSE);
-        } break;
-    }
-}
-
-static DRWAV_INLINE void drwav__bswap_samples(void* pSamples, drwav_uint64 sampleCount, drwav_uint32 bytesPerSample, drwav_uint16 format)
-{
-    switch (format)
-    {
-        case DR_WAVE_FORMAT_PCM:
-        {
-            drwav__bswap_samples_pcm(pSamples, sampleCount, bytesPerSample);
-        } break;
-
-        case DR_WAVE_FORMAT_IEEE_FLOAT:
-        {
-            drwav__bswap_samples_ieee(pSamples, sampleCount, bytesPerSample);
-        } break;
-
-        case DR_WAVE_FORMAT_ALAW:
-        case DR_WAVE_FORMAT_MULAW:
-        {
-            drwav__bswap_samples_s16((drwav_int16*)pSamples, sampleCount);
-        } break;
-
-        case DR_WAVE_FORMAT_ADPCM:
-        case DR_WAVE_FORMAT_DVI_ADPCM:
-        default:
-        {
-            /* Unsupported format. */
-            DRWAV_ASSERT(DRWAV_FALSE);
-        } break;
-    }
-}
-
-
-static void* drwav__malloc_default(size_t sz, void* pUserData)
-{
-    (void)pUserData;
-    return DRWAV_MALLOC(sz);
-}
-
-static void* drwav__realloc_default(void* p, size_t sz, void* pUserData)
-{
-    (void)pUserData;
-    return DRWAV_REALLOC(p, sz);
-}
-
-static void drwav__free_default(void* p, void* pUserData)
-{
-    (void)pUserData;
-    DRWAV_FREE(p);
-}
-
-
-static void* drwav__malloc_from_callbacks(size_t sz, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pAllocationCallbacks == NULL) {
-        return NULL;
-    }
-
-    if (pAllocationCallbacks->onMalloc != NULL) {
-        return pAllocationCallbacks->onMalloc(sz, pAllocationCallbacks->pUserData);
-    }
-
-    /* Try using realloc(). */
-    if (pAllocationCallbacks->onRealloc != NULL) {
-        return pAllocationCallbacks->onRealloc(NULL, sz, pAllocationCallbacks->pUserData);
-    }
-
-    return NULL;
-}
-
-static void* drwav__realloc_from_callbacks(void* p, size_t szNew, size_t szOld, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pAllocationCallbacks == NULL) {
-        return NULL;
-    }
-
-    if (pAllocationCallbacks->onRealloc != NULL) {
-        return pAllocationCallbacks->onRealloc(p, szNew, pAllocationCallbacks->pUserData);
-    }
-
-    /* Try emulating realloc() in terms of malloc()/free(). */
-    if (pAllocationCallbacks->onMalloc != NULL && pAllocationCallbacks->onFree != NULL) {
-        void* p2;
-
-        p2 = pAllocationCallbacks->onMalloc(szNew, pAllocationCallbacks->pUserData);
-        if (p2 == NULL) {
-            return NULL;
-        }
-
-        if (p != NULL) {
-            DRWAV_COPY_MEMORY(p2, p, szOld);
-            pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
-        }
-
-        return p2;
-    }
-
-    return NULL;
-}
-
-static void drwav__free_from_callbacks(void* p, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (p == NULL || pAllocationCallbacks == NULL) {
-        return;
-    }
-
-    if (pAllocationCallbacks->onFree != NULL) {
-        pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
-    }
-}
-
-
-static drwav_allocation_callbacks drwav_copy_allocation_callbacks_or_defaults(const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pAllocationCallbacks != NULL) {
-        /* Copy. */
-        return *pAllocationCallbacks;
-    } else {
-        /* Defaults. */
-        drwav_allocation_callbacks allocationCallbacks;
-        allocationCallbacks.pUserData = NULL;
-        allocationCallbacks.onMalloc  = drwav__malloc_default;
-        allocationCallbacks.onRealloc = drwav__realloc_default;
-        allocationCallbacks.onFree    = drwav__free_default;
-        return allocationCallbacks;
-    }
-}
-
-
-static DRWAV_INLINE drwav_bool32 drwav__is_compressed_format_tag(drwav_uint16 formatTag)
-{
-    return
-        formatTag == DR_WAVE_FORMAT_ADPCM ||
-        formatTag == DR_WAVE_FORMAT_DVI_ADPCM;
-}
-
-static unsigned int drwav__chunk_padding_size_riff(drwav_uint64 chunkSize)
-{
-    return (unsigned int)(chunkSize % 2);
-}
-
-static unsigned int drwav__chunk_padding_size_w64(drwav_uint64 chunkSize)
-{
-    return (unsigned int)(chunkSize % 8);
-}
-
-static drwav_uint64 drwav_read_pcm_frames_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut);
-static drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut);
-static drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount);
-
-static drwav_result drwav__read_chunk_header(drwav_read_proc onRead, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav_chunk_header* pHeaderOut)
-{
-    if (container == drwav_container_riff) {
-        drwav_uint8 sizeInBytes[4];
-
-        if (onRead(pUserData, pHeaderOut->id.fourcc, 4) != 4) {
-            return DRWAV_AT_END;
-        }
-
-        if (onRead(pUserData, sizeInBytes, 4) != 4) {
-            return DRWAV_INVALID_FILE;
-        }
-
-        pHeaderOut->sizeInBytes = drwav__bytes_to_u32(sizeInBytes);
-        pHeaderOut->paddingSize = drwav__chunk_padding_size_riff(pHeaderOut->sizeInBytes);
-        *pRunningBytesReadOut += 8;
-    } else {
-        drwav_uint8 sizeInBytes[8];
-
-        if (onRead(pUserData, pHeaderOut->id.guid, 16) != 16) {
-            return DRWAV_AT_END;
-        }
-
-        if (onRead(pUserData, sizeInBytes, 8) != 8) {
-            return DRWAV_INVALID_FILE;
-        }
-
-        pHeaderOut->sizeInBytes = drwav__bytes_to_u64(sizeInBytes) - 24;    /* <-- Subtract 24 because w64 includes the size of the header. */
-        pHeaderOut->paddingSize = drwav__chunk_padding_size_w64(pHeaderOut->sizeInBytes);
-        *pRunningBytesReadOut += 24;
-    }
-
-    return DRWAV_SUCCESS;
-}
-
-static drwav_bool32 drwav__seek_forward(drwav_seek_proc onSeek, drwav_uint64 offset, void* pUserData)
-{
-    drwav_uint64 bytesRemainingToSeek = offset;
-    while (bytesRemainingToSeek > 0) {
-        if (bytesRemainingToSeek > 0x7FFFFFFF) {
-            if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_current)) {
-                return DRWAV_FALSE;
-            }
-            bytesRemainingToSeek -= 0x7FFFFFFF;
-        } else {
-            if (!onSeek(pUserData, (int)bytesRemainingToSeek, drwav_seek_origin_current)) {
-                return DRWAV_FALSE;
-            }
-            bytesRemainingToSeek = 0;
-        }
-    }
-
-    return DRWAV_TRUE;
-}
-
-static drwav_bool32 drwav__seek_from_start(drwav_seek_proc onSeek, drwav_uint64 offset, void* pUserData)
-{
-    if (offset <= 0x7FFFFFFF) {
-        return onSeek(pUserData, (int)offset, drwav_seek_origin_start);
-    }
-
-    /* Larger than 32-bit seek. */
-    if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_start)) {
-        return DRWAV_FALSE;
-    }
-    offset -= 0x7FFFFFFF;
-
-    for (;;) {
-        if (offset <= 0x7FFFFFFF) {
-            return onSeek(pUserData, (int)offset, drwav_seek_origin_current);
-        }
-
-        if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_current)) {
-            return DRWAV_FALSE;
-        }
-        offset -= 0x7FFFFFFF;
-    }
-
-    /* Should never get here. */
-    /*return DRWAV_TRUE; */
-}
-
-
-static drwav_bool32 drwav__read_fmt(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav_fmt* fmtOut)
-{
-    drwav_chunk_header header;
-    drwav_uint8 fmt[16];
-
-    if (drwav__read_chunk_header(onRead, pUserData, container, pRunningBytesReadOut, &header) != DRWAV_SUCCESS) {
-        return DRWAV_FALSE;
-    }
-
-
-    /* Skip non-fmt chunks. */
-    while ((container == drwav_container_riff && !drwav__fourcc_equal(header.id.fourcc, "fmt ")) || (container == drwav_container_w64 && !drwav__guid_equal(header.id.guid, drwavGUID_W64_FMT))) {
-        if (!drwav__seek_forward(onSeek, header.sizeInBytes + header.paddingSize, pUserData)) {
-            return DRWAV_FALSE;
-        }
-        *pRunningBytesReadOut += header.sizeInBytes + header.paddingSize;
-
-        /* Try the next header. */
-        if (drwav__read_chunk_header(onRead, pUserData, container, pRunningBytesReadOut, &header) != DRWAV_SUCCESS) {
-            return DRWAV_FALSE;
-        }
-    }
-
-
-    /* Validation. */
-    if (container == drwav_container_riff) {
-        if (!drwav__fourcc_equal(header.id.fourcc, "fmt ")) {
-            return DRWAV_FALSE;
-        }
-    } else {
-        if (!drwav__guid_equal(header.id.guid, drwavGUID_W64_FMT)) {
-            return DRWAV_FALSE;
-        }
-    }
-
-
-    if (onRead(pUserData, fmt, sizeof(fmt)) != sizeof(fmt)) {
-        return DRWAV_FALSE;
-    }
-    *pRunningBytesReadOut += sizeof(fmt);
-
-    fmtOut->formatTag      = drwav__bytes_to_u16(fmt + 0);
-    fmtOut->channels       = drwav__bytes_to_u16(fmt + 2);
-    fmtOut->sampleRate     = drwav__bytes_to_u32(fmt + 4);
-    fmtOut->avgBytesPerSec = drwav__bytes_to_u32(fmt + 8);
-    fmtOut->blockAlign     = drwav__bytes_to_u16(fmt + 12);
-    fmtOut->bitsPerSample  = drwav__bytes_to_u16(fmt + 14);
-
-    fmtOut->extendedSize       = 0;
-    fmtOut->validBitsPerSample = 0;
-    fmtOut->channelMask        = 0;
-    memset(fmtOut->subFormat, 0, sizeof(fmtOut->subFormat));
-
-    if (header.sizeInBytes > 16) {
-        drwav_uint8 fmt_cbSize[2];
-        int bytesReadSoFar = 0;
-
-        if (onRead(pUserData, fmt_cbSize, sizeof(fmt_cbSize)) != sizeof(fmt_cbSize)) {
-            return DRWAV_FALSE;    /* Expecting more data. */
-        }
-        *pRunningBytesReadOut += sizeof(fmt_cbSize);
-
-        bytesReadSoFar = 18;
-
-        fmtOut->extendedSize = drwav__bytes_to_u16(fmt_cbSize);
-        if (fmtOut->extendedSize > 0) {
-            /* Simple validation. */
-            if (fmtOut->formatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
-                if (fmtOut->extendedSize != 22) {
-                    return DRWAV_FALSE;
-                }
-            }
-
-            if (fmtOut->formatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
-                drwav_uint8 fmtext[22];
-                if (onRead(pUserData, fmtext, fmtOut->extendedSize) != fmtOut->extendedSize) {
-                    return DRWAV_FALSE;    /* Expecting more data. */
-                }
-
-                fmtOut->validBitsPerSample = drwav__bytes_to_u16(fmtext + 0);
-                fmtOut->channelMask        = drwav__bytes_to_u32(fmtext + 2);
-                drwav__bytes_to_guid(fmtext + 6, fmtOut->subFormat);
-            } else {
-                if (!onSeek(pUserData, fmtOut->extendedSize, drwav_seek_origin_current)) {
-                    return DRWAV_FALSE;
-                }
-            }
-            *pRunningBytesReadOut += fmtOut->extendedSize;
-
-            bytesReadSoFar += fmtOut->extendedSize;
-        }
-
-        /* Seek past any leftover bytes. For w64 the leftover will be defined based on the chunk size. */
-        if (!onSeek(pUserData, (int)(header.sizeInBytes - bytesReadSoFar), drwav_seek_origin_current)) {
-            return DRWAV_FALSE;
-        }
-        *pRunningBytesReadOut += (header.sizeInBytes - bytesReadSoFar);
-    }
-
-    if (header.paddingSize > 0) {
-        if (!onSeek(pUserData, header.paddingSize, drwav_seek_origin_current)) {
-            return DRWAV_FALSE;
-        }
-        *pRunningBytesReadOut += header.paddingSize;
-    }
-
-    return DRWAV_TRUE;
-}
-
-
-static size_t drwav__on_read(drwav_read_proc onRead, void* pUserData, void* pBufferOut, size_t bytesToRead, drwav_uint64* pCursor)
-{
-    size_t bytesRead;
-
-    DRWAV_ASSERT(onRead != NULL);
-    DRWAV_ASSERT(pCursor != NULL);
-
-    bytesRead = onRead(pUserData, pBufferOut, bytesToRead);
-    *pCursor += bytesRead;
-    return bytesRead;
-}
-
-#if 0
-static drwav_bool32 drwav__on_seek(drwav_seek_proc onSeek, void* pUserData, int offset, drwav_seek_origin origin, drwav_uint64* pCursor)
-{
-    DRWAV_ASSERT(onSeek != NULL);
-    DRWAV_ASSERT(pCursor != NULL);
-
-    if (!onSeek(pUserData, offset, origin)) {
-        return DRWAV_FALSE;
-    }
-
-    if (origin == drwav_seek_origin_start) {
-        *pCursor = offset;
-    } else {
-        *pCursor += offset;
-    }
-
-    return DRWAV_TRUE;
-}
-#endif
-
-
-
-static drwav_uint32 drwav_get_bytes_per_pcm_frame(drwav* pWav)
-{
-    /*
-    The bytes per frame is a bit ambiguous. It can be either be based on the bits per sample, or the block align. The way I'm doing it here
-    is that if the bits per sample is a multiple of 8, use floor(bitsPerSample*channels/8), otherwise fall back to the block align.
-    */
-    if ((pWav->bitsPerSample & 0x7) == 0) {
-        /* Bits per sample is a multiple of 8. */
-        return (pWav->bitsPerSample * pWav->fmt.channels) >> 3;
-    } else {
-        return pWav->fmt.blockAlign;
-    }
-}
-
-DRWAV_API drwav_uint16 drwav_fmt_get_format(const drwav_fmt* pFMT)
-{
-    if (pFMT == NULL) {
-        return 0;
-    }
-
-    if (pFMT->formatTag != DR_WAVE_FORMAT_EXTENSIBLE) {
-        return pFMT->formatTag;
-    } else {
-        return drwav__bytes_to_u16(pFMT->subFormat);    /* Only the first two bytes are required. */
-    }
-}
-
-static drwav_bool32 drwav_preinit(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pReadSeekUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pWav == NULL || onRead == NULL || onSeek == NULL) {
-        return DRWAV_FALSE;
-    }
-
-    DRWAV_ZERO_MEMORY(pWav, sizeof(*pWav));
-    pWav->onRead    = onRead;
-    pWav->onSeek    = onSeek;
-    pWav->pUserData = pReadSeekUserData;
-    pWav->allocationCallbacks = drwav_copy_allocation_callbacks_or_defaults(pAllocationCallbacks);
-
-    if (pWav->allocationCallbacks.onFree == NULL || (pWav->allocationCallbacks.onMalloc == NULL && pWav->allocationCallbacks.onRealloc == NULL)) {
-        return DRWAV_FALSE;    /* Invalid allocation callbacks. */
-    }
-
-    return DRWAV_TRUE;
-}
-
-static drwav_bool32 drwav_init__internal(drwav* pWav, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags)
-{
-    /* This function assumes drwav_preinit() has been called beforehand. */
-
-    drwav_uint64 cursor;    /* <-- Keeps track of the byte position so we can seek to specific locations. */
-    drwav_bool32 sequential;
-    drwav_uint8 riff[4];
-    drwav_fmt fmt;
-    unsigned short translatedFormatTag;
-    drwav_uint64 sampleCountFromFactChunk;
-    drwav_bool32 foundDataChunk;
-    drwav_uint64 dataChunkSize;
-    drwav_uint64 chunkSize;
-
-    cursor = 0;
-    sequential = (flags & DRWAV_SEQUENTIAL) != 0;
-
-    /* The first 4 bytes should be the RIFF identifier. */
-    if (drwav__on_read(pWav->onRead, pWav->pUserData, riff, sizeof(riff), &cursor) != sizeof(riff)) {
-        return DRWAV_FALSE;
-    }
-
-    /*
-    The first 4 bytes can be used to identify the container. For RIFF files it will start with "RIFF" and for
-    w64 it will start with "riff".
-    */
-    if (drwav__fourcc_equal(riff, "RIFF")) {
-        pWav->container = drwav_container_riff;
-    } else if (drwav__fourcc_equal(riff, "riff")) {
-        int i;
-        drwav_uint8 riff2[12];
-
-        pWav->container = drwav_container_w64;
-
-        /* Check the rest of the GUID for validity. */
-        if (drwav__on_read(pWav->onRead, pWav->pUserData, riff2, sizeof(riff2), &cursor) != sizeof(riff2)) {
-            return DRWAV_FALSE;
-        }
-
-        for (i = 0; i < 12; ++i) {
-            if (riff2[i] != drwavGUID_W64_RIFF[i+4]) {
-                return DRWAV_FALSE;
-            }
-        }
-    } else {
-        return DRWAV_FALSE;   /* Unknown or unsupported container. */
-    }
-
-
-    if (pWav->container == drwav_container_riff) {
-        drwav_uint8 chunkSizeBytes[4];
-        drwav_uint8 wave[4];
-
-        /* RIFF/WAVE */
-        if (drwav__on_read(pWav->onRead, pWav->pUserData, chunkSizeBytes, sizeof(chunkSizeBytes), &cursor) != sizeof(chunkSizeBytes)) {
-            return DRWAV_FALSE;
-        }
-
-        if (drwav__bytes_to_u32(chunkSizeBytes) < 36) {
-            return DRWAV_FALSE;    /* Chunk size should always be at least 36 bytes. */
-        }
-
-        if (drwav__on_read(pWav->onRead, pWav->pUserData, wave, sizeof(wave), &cursor) != sizeof(wave)) {
-            return DRWAV_FALSE;
-        }
-
-        if (!drwav__fourcc_equal(wave, "WAVE")) {
-            return DRWAV_FALSE;    /* Expecting "WAVE". */
-        }
-    } else {
-        drwav_uint8 chunkSizeBytes[8];
-        drwav_uint8 wave[16];
-
-        /* W64 */
-        if (drwav__on_read(pWav->onRead, pWav->pUserData, chunkSizeBytes, sizeof(chunkSizeBytes), &cursor) != sizeof(chunkSizeBytes)) {
-            return DRWAV_FALSE;
-        }
-
-        if (drwav__bytes_to_u64(chunkSizeBytes) < 80) {
-            return DRWAV_FALSE;
-        }
-
-        if (drwav__on_read(pWav->onRead, pWav->pUserData, wave, sizeof(wave), &cursor) != sizeof(wave)) {
-            return DRWAV_FALSE;
-        }
-
-        if (!drwav__guid_equal(wave, drwavGUID_W64_WAVE)) {
-            return DRWAV_FALSE;
-        }
-    }
-
-
-    /* The next bytes should be the "fmt " chunk. */
-    if (!drwav__read_fmt(pWav->onRead, pWav->onSeek, pWav->pUserData, pWav->container, &cursor, &fmt)) {
-        return DRWAV_FALSE;    /* Failed to read the "fmt " chunk. */
-    }
-
-    /* Basic validation. */
-    if ((fmt.sampleRate    == 0 || fmt.sampleRate    > DRWAV_MAX_SAMPLE_RATE)     ||
-        (fmt.channels      == 0 || fmt.channels      > DRWAV_MAX_CHANNELS)        ||
-        (fmt.bitsPerSample == 0 || fmt.bitsPerSample > DRWAV_MAX_BITS_PER_SAMPLE) ||
-        fmt.blockAlign == 0) {
-        return DRWAV_FALSE; /* Probably an invalid WAV file. */
-    }
-
-
-    /* Translate the internal format. */
-    translatedFormatTag = fmt.formatTag;
-    if (translatedFormatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
-        translatedFormatTag = drwav__bytes_to_u16(fmt.subFormat + 0);
-    }
-
-
-
-    sampleCountFromFactChunk = 0;
-
-    /*
-    We need to enumerate over each chunk for two reasons:
-      1) The "data" chunk may not be the next one
-      2) We may want to report each chunk back to the client
-    
-    In order to correctly report each chunk back to the client we will need to keep looping until the end of the file.
-    */
-    foundDataChunk = DRWAV_FALSE;
-    dataChunkSize = 0;
-
-    /* The next chunk we care about is the "data" chunk. This is not necessarily the next chunk so we'll need to loop. */
-    for (;;)
-    {
-        drwav_chunk_header header;
-        drwav_result result = drwav__read_chunk_header(pWav->onRead, pWav->pUserData, pWav->container, &cursor, &header);
-        if (result != DRWAV_SUCCESS) {
-            if (!foundDataChunk) {
-                return DRWAV_FALSE;
-            } else {
-                break;  /* Probably at the end of the file. Get out of the loop. */
-            }
-        }
-
-        /* Tell the client about this chunk. */
-        if (!sequential && onChunk != NULL) {
-            drwav_uint64 callbackBytesRead = onChunk(pChunkUserData, pWav->onRead, pWav->onSeek, pWav->pUserData, &header, pWav->container, &fmt);
-
-            /*
-            dr_wav may need to read the contents of the chunk, so we now need to seek back to the position before
-            we called the callback.
-            */
-            if (callbackBytesRead > 0) {
-                if (!drwav__seek_from_start(pWav->onSeek, cursor, pWav->pUserData)) {
-                    return DRWAV_FALSE;
-                }
-            }
-        }
-        
-
-        if (!foundDataChunk) {
-            pWav->dataChunkDataPos = cursor;
-        }
-
-        chunkSize = header.sizeInBytes;
-        if (pWav->container == drwav_container_riff) {
-            if (drwav__fourcc_equal(header.id.fourcc, "data")) {
-                foundDataChunk = DRWAV_TRUE;
-                dataChunkSize = chunkSize;
-            }
-        } else {
-            if (drwav__guid_equal(header.id.guid, drwavGUID_W64_DATA)) {
-                foundDataChunk = DRWAV_TRUE;
-                dataChunkSize = chunkSize;
-            }
-        }
-
-        /*
-        If at this point we have found the data chunk and we're running in sequential mode, we need to break out of this loop. The reason for
-        this is that we would otherwise require a backwards seek which sequential mode forbids.
-        */
-        if (foundDataChunk && sequential) {
-            break;
-        }
-
-        /* Optional. Get the total sample count from the FACT chunk. This is useful for compressed formats. */
-        if (pWav->container == drwav_container_riff) {
-            if (drwav__fourcc_equal(header.id.fourcc, "fact")) {
-                drwav_uint32 sampleCount;
-                if (drwav__on_read(pWav->onRead, pWav->pUserData, &sampleCount, 4, &cursor) != 4) {
-                    return DRWAV_FALSE;
-                }
-                chunkSize -= 4;
-
-                if (!foundDataChunk) {
-                    pWav->dataChunkDataPos = cursor;
-                }
-
-                /*
-                The sample count in the "fact" chunk is either unreliable, or I'm not understanding it properly. For now I am only enabling this
-                for Microsoft ADPCM formats.
-                */
-                if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
-                    sampleCountFromFactChunk = sampleCount;
-                } else {
-                    sampleCountFromFactChunk = 0;
-                }
-            }
-        } else {
-            if (drwav__guid_equal(header.id.guid, drwavGUID_W64_FACT)) {
-                if (drwav__on_read(pWav->onRead, pWav->pUserData, &sampleCountFromFactChunk, 8, &cursor) != 8) {
-                    return DRWAV_FALSE;
-                }
-                chunkSize -= 8;
-
-                if (!foundDataChunk) {
-                    pWav->dataChunkDataPos = cursor;
-                }
-            }
-        }
-
-        /* "smpl" chunk. */
-        if (pWav->container == drwav_container_riff) {
-            if (drwav__fourcc_equal(header.id.fourcc, "smpl")) {
-                drwav_uint8 smplHeaderData[36];    /* 36 = size of the smpl header section, not including the loop data. */
-                if (chunkSize >= sizeof(smplHeaderData)) {
-                    drwav_uint64 bytesJustRead = drwav__on_read(pWav->onRead, pWav->pUserData, smplHeaderData, sizeof(smplHeaderData), &cursor);
-                    chunkSize -= bytesJustRead;
-
-                    if (bytesJustRead == sizeof(smplHeaderData)) {
-                        drwav_uint32 iLoop;
-
-                        pWav->smpl.manufacturer      = drwav__bytes_to_u32(smplHeaderData+0);
-                        pWav->smpl.product           = drwav__bytes_to_u32(smplHeaderData+4);
-                        pWav->smpl.samplePeriod      = drwav__bytes_to_u32(smplHeaderData+8);
-                        pWav->smpl.midiUnityNotes    = drwav__bytes_to_u32(smplHeaderData+12);
-                        pWav->smpl.midiPitchFraction = drwav__bytes_to_u32(smplHeaderData+16);
-                        pWav->smpl.smpteFormat       = drwav__bytes_to_u32(smplHeaderData+20);
-                        pWav->smpl.smpteOffset       = drwav__bytes_to_u32(smplHeaderData+24);
-                        pWav->smpl.numSampleLoops    = drwav__bytes_to_u32(smplHeaderData+28);
-                        pWav->smpl.samplerData       = drwav__bytes_to_u32(smplHeaderData+32);
-
-                        for (iLoop = 0; iLoop < pWav->smpl.numSampleLoops && iLoop < drwav_countof(pWav->smpl.loops); ++iLoop) {
-                            drwav_uint8 smplLoopData[24];  /* 24 = size of a loop section in the smpl chunk. */
-                            bytesJustRead = drwav__on_read(pWav->onRead, pWav->pUserData, smplLoopData, sizeof(smplLoopData), &cursor);
-                            chunkSize -= bytesJustRead;
-
-                            if (bytesJustRead == sizeof(smplLoopData)) {
-                                pWav->smpl.loops[iLoop].cuePointId = drwav__bytes_to_u32(smplLoopData+0);
-                                pWav->smpl.loops[iLoop].type       = drwav__bytes_to_u32(smplLoopData+4);
-                                pWav->smpl.loops[iLoop].start      = drwav__bytes_to_u32(smplLoopData+8);
-                                pWav->smpl.loops[iLoop].end        = drwav__bytes_to_u32(smplLoopData+12);
-                                pWav->smpl.loops[iLoop].fraction   = drwav__bytes_to_u32(smplLoopData+16);
-                                pWav->smpl.loops[iLoop].playCount  = drwav__bytes_to_u32(smplLoopData+20);
-                            } else {
-                                break;  /* Break from the smpl loop for loop. */
-                            }
-                        }
-                    }
-                } else {
-                    /* Looks like invalid data. Ignore the chunk. */
-                }
-            }
-        } else {
-            if (drwav__guid_equal(header.id.guid, drwavGUID_W64_SMPL)) {
-                /*
-                This path will be hit when a W64 WAV file contains a smpl chunk. I don't have a sample file to test this path, so a contribution
-                is welcome to add support for this.
-                */
-            }
-        }
-
-        /* Make sure we seek past the padding. */
-        chunkSize += header.paddingSize;
-        if (!drwav__seek_forward(pWav->onSeek, chunkSize, pWav->pUserData)) {
-            break;
-        }
-        cursor += chunkSize;
-
-        if (!foundDataChunk) {
-            pWav->dataChunkDataPos = cursor;
-        }
-    }
-
-    /* If we haven't found a data chunk, return an error. */
-    if (!foundDataChunk) {
-        return DRWAV_FALSE;
-    }
-
-    /* We may have moved passed the data chunk. If so we need to move back. If running in sequential mode we can assume we are already sitting on the data chunk. */
-    if (!sequential) {
-        if (!drwav__seek_from_start(pWav->onSeek, pWav->dataChunkDataPos, pWav->pUserData)) {
-            return DRWAV_FALSE;
-        }
-        cursor = pWav->dataChunkDataPos;
-    }
-    
-
-    /* At this point we should be sitting on the first byte of the raw audio data. */
-
-    pWav->fmt                 = fmt;
-    pWav->sampleRate          = fmt.sampleRate;
-    pWav->channels            = fmt.channels;
-    pWav->bitsPerSample       = fmt.bitsPerSample;
-    pWav->bytesRemaining      = dataChunkSize;
-    pWav->translatedFormatTag = translatedFormatTag;
-    pWav->dataChunkDataSize   = dataChunkSize;
-
-    if (sampleCountFromFactChunk != 0) {
-        pWav->totalPCMFrameCount = sampleCountFromFactChunk;
-    } else {
-        pWav->totalPCMFrameCount = dataChunkSize / drwav_get_bytes_per_pcm_frame(pWav);
-
-        if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
-            drwav_uint64 totalBlockHeaderSizeInBytes;
-            drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
-
-            /* Make sure any trailing partial block is accounted for. */
-            if ((blockCount * fmt.blockAlign) < dataChunkSize) {
-                blockCount += 1;
-            }
-
-            /* We decode two samples per byte. There will be blockCount headers in the data chunk. This is enough to know how to calculate the total PCM frame count. */
-            totalBlockHeaderSizeInBytes = blockCount * (6*fmt.channels);
-            pWav->totalPCMFrameCount = ((dataChunkSize - totalBlockHeaderSizeInBytes) * 2) / fmt.channels;
-        }
-        if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
-            drwav_uint64 totalBlockHeaderSizeInBytes;
-            drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
-
-            /* Make sure any trailing partial block is accounted for. */
-            if ((blockCount * fmt.blockAlign) < dataChunkSize) {
-                blockCount += 1;
-            }
-
-            /* We decode two samples per byte. There will be blockCount headers in the data chunk. This is enough to know how to calculate the total PCM frame count. */
-            totalBlockHeaderSizeInBytes = blockCount * (4*fmt.channels);
-            pWav->totalPCMFrameCount = ((dataChunkSize - totalBlockHeaderSizeInBytes) * 2) / fmt.channels;
-
-            /* The header includes a decoded sample for each channel which acts as the initial predictor sample. */
-            pWav->totalPCMFrameCount += blockCount;
-        }
-    }
-
-    /* Some formats only support a certain number of channels. */
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM || pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
-        if (pWav->channels > 2) {
-            return DRWAV_FALSE;
-        }
-    }
-
-#ifdef DR_WAV_LIBSNDFILE_COMPAT
-    /*
-    I use libsndfile as a benchmark for testing, however in the version I'm using (from the Windows installer on the libsndfile website),
-    it appears the total sample count libsndfile uses for MS-ADPCM is incorrect. It would seem they are computing the total sample count
-    from the number of blocks, however this results in the inclusion of extra silent samples at the end of the last block. The correct
-    way to know the total sample count is to inspect the "fact" chunk, which should always be present for compressed formats, and should
-    always include the sample count. This little block of code below is only used to emulate the libsndfile logic so I can properly run my
-    correctness tests against libsndfile, and is disabled by default.
-    */
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
-        drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
-        pWav->totalPCMFrameCount = (((blockCount * (fmt.blockAlign - (6*pWav->channels))) * 2)) / fmt.channels;  /* x2 because two samples per byte. */
-    }
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
-        drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
-        pWav->totalPCMFrameCount = (((blockCount * (fmt.blockAlign - (4*pWav->channels))) * 2) + (blockCount * pWav->channels)) / fmt.channels;
-    }
-#endif
-
-    return DRWAV_TRUE;
-}
-
-DRWAV_API drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    return drwav_init_ex(pWav, onRead, onSeek, NULL, pUserData, NULL, 0, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (!drwav_preinit(pWav, onRead, onSeek, pReadSeekUserData, pAllocationCallbacks)) {
-        return DRWAV_FALSE;
-    }
-
-    return drwav_init__internal(pWav, onChunk, pChunkUserData, flags);
-}
-
-
-static drwav_uint32 drwav__riff_chunk_size_riff(drwav_uint64 dataChunkSize)
-{
-    drwav_uint32 dataSubchunkPaddingSize = drwav__chunk_padding_size_riff(dataChunkSize);
-
-    if (dataChunkSize <= (0xFFFFFFFFUL - 36 - dataSubchunkPaddingSize)) {
-        return 36 + (drwav_uint32)(dataChunkSize + dataSubchunkPaddingSize);
-    } else {
-        return 0xFFFFFFFF;
-    }
-}
-
-static drwav_uint32 drwav__data_chunk_size_riff(drwav_uint64 dataChunkSize)
-{
-    if (dataChunkSize <= 0xFFFFFFFFUL) {
-        return (drwav_uint32)dataChunkSize;
-    } else {
-        return 0xFFFFFFFFUL;
-    }
-}
-
-static drwav_uint64 drwav__riff_chunk_size_w64(drwav_uint64 dataChunkSize)
-{
-    drwav_uint64 dataSubchunkPaddingSize = drwav__chunk_padding_size_w64(dataChunkSize);
-
-    return 80 + 24 + dataChunkSize + dataSubchunkPaddingSize;   /* +24 because W64 includes the size of the GUID and size fields. */
-}
-
-static drwav_uint64 drwav__data_chunk_size_w64(drwav_uint64 dataChunkSize)
-{
-    return 24 + dataChunkSize;        /* +24 because W64 includes the size of the GUID and size fields. */
-}
-
-
-static drwav_bool32 drwav_preinit_write(drwav* pWav, const drwav_data_format* pFormat, drwav_bool32 isSequential, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pWav == NULL || onWrite == NULL) {
-        return DRWAV_FALSE;
-    }
-
-    if (!isSequential && onSeek == NULL) {
-        return DRWAV_FALSE; /* <-- onSeek is required when in non-sequential mode. */
-    }
-
-    /* Not currently supporting compressed formats. Will need to add support for the "fact" chunk before we enable this. */
-    if (pFormat->format == DR_WAVE_FORMAT_EXTENSIBLE) {
-        return DRWAV_FALSE;
-    }
-    if (pFormat->format == DR_WAVE_FORMAT_ADPCM || pFormat->format == DR_WAVE_FORMAT_DVI_ADPCM) {
-        return DRWAV_FALSE;
-    }
-
-    DRWAV_ZERO_MEMORY(pWav, sizeof(*pWav));
-    pWav->onWrite   = onWrite;
-    pWav->onSeek    = onSeek;
-    pWav->pUserData = pUserData;
-    pWav->allocationCallbacks = drwav_copy_allocation_callbacks_or_defaults(pAllocationCallbacks);
-
-    if (pWav->allocationCallbacks.onFree == NULL || (pWav->allocationCallbacks.onMalloc == NULL && pWav->allocationCallbacks.onRealloc == NULL)) {
-        return DRWAV_FALSE;    /* Invalid allocation callbacks. */
-    }
-
-    pWav->fmt.formatTag = (drwav_uint16)pFormat->format;
-    pWav->fmt.channels = (drwav_uint16)pFormat->channels;
-    pWav->fmt.sampleRate = pFormat->sampleRate;
-    pWav->fmt.avgBytesPerSec = (drwav_uint32)((pFormat->bitsPerSample * pFormat->sampleRate * pFormat->channels) / 8);
-    pWav->fmt.blockAlign = (drwav_uint16)((pFormat->channels * pFormat->bitsPerSample) / 8);
-    pWav->fmt.bitsPerSample = (drwav_uint16)pFormat->bitsPerSample;
-    pWav->fmt.extendedSize = 0;
-    pWav->isSequentialWrite = isSequential;
-
-    return DRWAV_TRUE;
-}
-
-static drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount)
-{
-    /* The function assumes drwav_preinit_write() was called beforehand. */
-
-    size_t runningPos = 0;
-    drwav_uint64 initialDataChunkSize = 0;
-    drwav_uint64 chunkSizeFMT;
-
-    /*
-    The initial values for the "RIFF" and "data" chunks depends on whether or not we are initializing in sequential mode or not. In
-    sequential mode we set this to its final values straight away since they can be calculated from the total sample count. In non-
-    sequential mode we initialize it all to zero and fill it out in drwav_uninit() using a backwards seek.
-    */
-    if (pWav->isSequentialWrite) {
-        initialDataChunkSize = (totalSampleCount * pWav->fmt.bitsPerSample) / 8;
-
-        /*
-        The RIFF container has a limit on the number of samples. drwav is not allowing this. There's no practical limits for Wave64
-        so for the sake of simplicity I'm not doing any validation for that.
-        */
-        if (pFormat->container == drwav_container_riff) {
-            if (initialDataChunkSize > (0xFFFFFFFFUL - 36)) {
-                return DRWAV_FALSE; /* Not enough room to store every sample. */
-            }
-        }
-    }
-
-    pWav->dataChunkDataSizeTargetWrite = initialDataChunkSize;
-
-
-    /* "RIFF" chunk. */
-    if (pFormat->container == drwav_container_riff) {
-        drwav_uint32 chunkSizeRIFF = 36 + (drwav_uint32)initialDataChunkSize;   /* +36 = "RIFF"+[RIFF Chunk Size]+"WAVE" + [sizeof "fmt " chunk] */
-        runningPos += pWav->onWrite(pWav->pUserData, "RIFF", 4);
-        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeRIFF, 4);
-        runningPos += pWav->onWrite(pWav->pUserData, "WAVE", 4);
-    } else {
-        drwav_uint64 chunkSizeRIFF = 80 + 24 + initialDataChunkSize;   /* +24 because W64 includes the size of the GUID and size fields. */
-        runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_RIFF, 16);
-        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeRIFF, 8);
-        runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_WAVE, 16);
-    }
-
-    /* "fmt " chunk. */
-    if (pFormat->container == drwav_container_riff) {
-        chunkSizeFMT = 16;
-        runningPos += pWav->onWrite(pWav->pUserData, "fmt ", 4);
-        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeFMT, 4);
-    } else {
-        chunkSizeFMT = 40;
-        runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_FMT, 16);
-        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeFMT, 8);
-    }
-
-    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.formatTag,      2);
-    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.channels,       2);
-    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.sampleRate,     4);
-    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.avgBytesPerSec, 4);
-    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.blockAlign,     2);
-    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.bitsPerSample,  2);
-
-    pWav->dataChunkDataPos = runningPos;
-
-    /* "data" chunk. */
-    if (pFormat->container == drwav_container_riff) {
-        drwav_uint32 chunkSizeDATA = (drwav_uint32)initialDataChunkSize;
-        runningPos += pWav->onWrite(pWav->pUserData, "data", 4);
-        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeDATA, 4);
-    } else {
-        drwav_uint64 chunkSizeDATA = 24 + initialDataChunkSize; /* +24 because W64 includes the size of the GUID and size fields. */
-        runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_DATA, 16);
-        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeDATA, 8);
-    }
-
-
-    /* Simple validation. */
-    if (pFormat->container == drwav_container_riff) {
-        if (runningPos != 20 + chunkSizeFMT + 8) {
-            return DRWAV_FALSE;
-        }
-    } else {
-        if (runningPos != 40 + chunkSizeFMT + 24) {
-            return DRWAV_FALSE;
-        }
-    }
-    
-
-    /* Set some properties for the client's convenience. */
-    pWav->container = pFormat->container;
-    pWav->channels = (drwav_uint16)pFormat->channels;
-    pWav->sampleRate = pFormat->sampleRate;
-    pWav->bitsPerSample = (drwav_uint16)pFormat->bitsPerSample;
-    pWav->translatedFormatTag = (drwav_uint16)pFormat->format;
-
-    return DRWAV_TRUE;
-}
-
-
-DRWAV_API drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (!drwav_preinit_write(pWav, pFormat, DRWAV_FALSE, onWrite, onSeek, pUserData, pAllocationCallbacks)) {
-        return DRWAV_FALSE;
-    }
-
-    return drwav_init_write__internal(pWav, pFormat, 0);               /* DRWAV_FALSE = Not Sequential */
-}
-
-DRWAV_API drwav_bool32 drwav_init_write_sequential(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (!drwav_preinit_write(pWav, pFormat, DRWAV_TRUE, onWrite, NULL, pUserData, pAllocationCallbacks)) {
-        return DRWAV_FALSE;
-    }
-
-    return drwav_init_write__internal(pWav, pFormat, totalSampleCount); /* DRWAV_TRUE = Sequential */
-}
-
-DRWAV_API drwav_bool32 drwav_init_write_sequential_pcm_frames(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pFormat == NULL) {
-        return DRWAV_FALSE;
-    }
-
-    return drwav_init_write_sequential(pWav, pFormat, totalPCMFrameCount*pFormat->channels, onWrite, pUserData, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_uint64 drwav_target_write_size_bytes(const drwav_data_format* pFormat, drwav_uint64 totalSampleCount)
-{
-    /* Casting totalSampleCount to drwav_int64 for VC6 compatibility. No issues in practice because nobody is going to exhaust the whole 63 bits. */
-    drwav_uint64 targetDataSizeBytes = (drwav_uint64)((drwav_int64)totalSampleCount * pFormat->channels * pFormat->bitsPerSample/8.0);
-    drwav_uint64 riffChunkSizeBytes;
-    drwav_uint64 fileSizeBytes;
-
-    if (pFormat->container == drwav_container_riff) {
-        riffChunkSizeBytes = drwav__riff_chunk_size_riff(targetDataSizeBytes);
-        fileSizeBytes = (8 + riffChunkSizeBytes); /* +8 because WAV doesn't include the size of the ChunkID and ChunkSize fields. */
-    } else {
-        riffChunkSizeBytes = drwav__riff_chunk_size_w64(targetDataSizeBytes);
-        fileSizeBytes = riffChunkSizeBytes;
-    }
-
-    return fileSizeBytes;
-}
-
-
-#ifndef DR_WAV_NO_STDIO
-
-/* drwav_result_from_errno() is only used for fopen() and wfopen() so putting it inside DR_WAV_NO_STDIO for now. If something else needs this later we can move it out. */
-#include <errno.h>
-static drwav_result drwav_result_from_errno(int e)
-{
-    switch (e)
-    {
-        case 0: return DRWAV_SUCCESS;
-    #ifdef EPERM
-        case EPERM: return DRWAV_INVALID_OPERATION;
-    #endif
-    #ifdef ENOENT
-        case ENOENT: return DRWAV_DOES_NOT_EXIST;
-    #endif
-    #ifdef ESRCH
-        case ESRCH: return DRWAV_DOES_NOT_EXIST;
-    #endif
-    #ifdef EINTR
-        case EINTR: return DRWAV_INTERRUPT;
-    #endif
-    #ifdef EIO
-        case EIO: return DRWAV_IO_ERROR;
-    #endif
-    #ifdef ENXIO
-        case ENXIO: return DRWAV_DOES_NOT_EXIST;
-    #endif
-    #ifdef E2BIG
-        case E2BIG: return DRWAV_INVALID_ARGS;
-    #endif
-    #ifdef ENOEXEC
-        case ENOEXEC: return DRWAV_INVALID_FILE;
-    #endif
-    #ifdef EBADF
-        case EBADF: return DRWAV_INVALID_FILE;
-    #endif
-    #ifdef ECHILD
-        case ECHILD: return DRWAV_ERROR;
-    #endif
-    #ifdef EAGAIN
-        case EAGAIN: return DRWAV_UNAVAILABLE;
-    #endif
-    #ifdef ENOMEM
-        case ENOMEM: return DRWAV_OUT_OF_MEMORY;
-    #endif
-    #ifdef EACCES
-        case EACCES: return DRWAV_ACCESS_DENIED;
-    #endif
-    #ifdef EFAULT
-        case EFAULT: return DRWAV_BAD_ADDRESS;
-    #endif
-    #ifdef ENOTBLK
-        case ENOTBLK: return DRWAV_ERROR;
-    #endif
-    #ifdef EBUSY
-        case EBUSY: return DRWAV_BUSY;
-    #endif
-    #ifdef EEXIST
-        case EEXIST: return DRWAV_ALREADY_EXISTS;
-    #endif
-    #ifdef EXDEV
-        case EXDEV: return DRWAV_ERROR;
-    #endif
-    #ifdef ENODEV
-        case ENODEV: return DRWAV_DOES_NOT_EXIST;
-    #endif
-    #ifdef ENOTDIR
-        case ENOTDIR: return DRWAV_NOT_DIRECTORY;
-    #endif
-    #ifdef EISDIR
-        case EISDIR: return DRWAV_IS_DIRECTORY;
-    #endif
-    #ifdef EINVAL
-        case EINVAL: return DRWAV_INVALID_ARGS;
-    #endif
-    #ifdef ENFILE
-        case ENFILE: return DRWAV_TOO_MANY_OPEN_FILES;
-    #endif
-    #ifdef EMFILE
-        case EMFILE: return DRWAV_TOO_MANY_OPEN_FILES;
-    #endif
-    #ifdef ENOTTY
-        case ENOTTY: return DRWAV_INVALID_OPERATION;
-    #endif
-    #ifdef ETXTBSY
-        case ETXTBSY: return DRWAV_BUSY;
-    #endif
-    #ifdef EFBIG
-        case EFBIG: return DRWAV_TOO_BIG;
-    #endif
-    #ifdef ENOSPC
-        case ENOSPC: return DRWAV_NO_SPACE;
-    #endif
-    #ifdef ESPIPE
-        case ESPIPE: return DRWAV_BAD_SEEK;
-    #endif
-    #ifdef EROFS
-        case EROFS: return DRWAV_ACCESS_DENIED;
-    #endif
-    #ifdef EMLINK
-        case EMLINK: return DRWAV_TOO_MANY_LINKS;
-    #endif
-    #ifdef EPIPE
-        case EPIPE: return DRWAV_BAD_PIPE;
-    #endif
-    #ifdef EDOM
-        case EDOM: return DRWAV_OUT_OF_RANGE;
-    #endif
-    #ifdef ERANGE
-        case ERANGE: return DRWAV_OUT_OF_RANGE;
-    #endif
-    #ifdef EDEADLK
-        case EDEADLK: return DRWAV_DEADLOCK;
-    #endif
-    #ifdef ENAMETOOLONG
-        case ENAMETOOLONG: return DRWAV_PATH_TOO_LONG;
-    #endif
-    #ifdef ENOLCK
-        case ENOLCK: return DRWAV_ERROR;
-    #endif
-    #ifdef ENOSYS
-        case ENOSYS: return DRWAV_NOT_IMPLEMENTED;
-    #endif
-    #ifdef ENOTEMPTY
-        case ENOTEMPTY: return DRWAV_DIRECTORY_NOT_EMPTY;
-    #endif
-    #ifdef ELOOP
-        case ELOOP: return DRWAV_TOO_MANY_LINKS;
-    #endif
-    #ifdef ENOMSG
-        case ENOMSG: return DRWAV_NO_MESSAGE;
-    #endif
-    #ifdef EIDRM
-        case EIDRM: return DRWAV_ERROR;
-    #endif
-    #ifdef ECHRNG
-        case ECHRNG: return DRWAV_ERROR;
-    #endif
-    #ifdef EL2NSYNC
-        case EL2NSYNC: return DRWAV_ERROR;
-    #endif
-    #ifdef EL3HLT
-        case EL3HLT: return DRWAV_ERROR;
-    #endif
-    #ifdef EL3RST
-        case EL3RST: return DRWAV_ERROR;
-    #endif
-    #ifdef ELNRNG
-        case ELNRNG: return DRWAV_OUT_OF_RANGE;
-    #endif
-    #ifdef EUNATCH
-        case EUNATCH: return DRWAV_ERROR;
-    #endif
-    #ifdef ENOCSI
-        case ENOCSI: return DRWAV_ERROR;
-    #endif
-    #ifdef EL2HLT
-        case EL2HLT: return DRWAV_ERROR;
-    #endif
-    #ifdef EBADE
-        case EBADE: return DRWAV_ERROR;
-    #endif
-    #ifdef EBADR
-        case EBADR: return DRWAV_ERROR;
-    #endif
-    #ifdef EXFULL
-        case EXFULL: return DRWAV_ERROR;
-    #endif
-    #ifdef ENOANO
-        case ENOANO: return DRWAV_ERROR;
-    #endif
-    #ifdef EBADRQC
-        case EBADRQC: return DRWAV_ERROR;
-    #endif
-    #ifdef EBADSLT
-        case EBADSLT: return DRWAV_ERROR;
-    #endif
-    #ifdef EBFONT
-        case EBFONT: return DRWAV_INVALID_FILE;
-    #endif
-    #ifdef ENOSTR
-        case ENOSTR: return DRWAV_ERROR;
-    #endif
-    #ifdef ENODATA
-        case ENODATA: return DRWAV_NO_DATA_AVAILABLE;
-    #endif
-    #ifdef ETIME
-        case ETIME: return DRWAV_TIMEOUT;
-    #endif
-    #ifdef ENOSR
-        case ENOSR: return DRWAV_NO_DATA_AVAILABLE;
-    #endif
-    #ifdef ENONET
-        case ENONET: return DRWAV_NO_NETWORK;
-    #endif
-    #ifdef ENOPKG
-        case ENOPKG: return DRWAV_ERROR;
-    #endif
-    #ifdef EREMOTE
-        case EREMOTE: return DRWAV_ERROR;
-    #endif
-    #ifdef ENOLINK
-        case ENOLINK: return DRWAV_ERROR;
-    #endif
-    #ifdef EADV
-        case EADV: return DRWAV_ERROR;
-    #endif
-    #ifdef ESRMNT
-        case ESRMNT: return DRWAV_ERROR;
-    #endif
-    #ifdef ECOMM
-        case ECOMM: return DRWAV_ERROR;
-    #endif
-    #ifdef EPROTO
-        case EPROTO: return DRWAV_ERROR;
-    #endif
-    #ifdef EMULTIHOP
-        case EMULTIHOP: return DRWAV_ERROR;
-    #endif
-    #ifdef EDOTDOT
-        case EDOTDOT: return DRWAV_ERROR;
-    #endif
-    #ifdef EBADMSG
-        case EBADMSG: return DRWAV_BAD_MESSAGE;
-    #endif
-    #ifdef EOVERFLOW
-        case EOVERFLOW: return DRWAV_TOO_BIG;
-    #endif
-    #ifdef ENOTUNIQ
-        case ENOTUNIQ: return DRWAV_NOT_UNIQUE;
-    #endif
-    #ifdef EBADFD
-        case EBADFD: return DRWAV_ERROR;
-    #endif
-    #ifdef EREMCHG
-        case EREMCHG: return DRWAV_ERROR;
-    #endif
-    #ifdef ELIBACC
-        case ELIBACC: return DRWAV_ACCESS_DENIED;
-    #endif
-    #ifdef ELIBBAD
-        case ELIBBAD: return DRWAV_INVALID_FILE;
-    #endif
-    #ifdef ELIBSCN
-        case ELIBSCN: return DRWAV_INVALID_FILE;
-    #endif
-    #ifdef ELIBMAX
-        case ELIBMAX: return DRWAV_ERROR;
-    #endif
-    #ifdef ELIBEXEC
-        case ELIBEXEC: return DRWAV_ERROR;
-    #endif
-    #ifdef EILSEQ
-        case EILSEQ: return DRWAV_INVALID_DATA;
-    #endif
-    #ifdef ERESTART
-        case ERESTART: return DRWAV_ERROR;
-    #endif
-    #ifdef ESTRPIPE
-        case ESTRPIPE: return DRWAV_ERROR;
-    #endif
-    #ifdef EUSERS
-        case EUSERS: return DRWAV_ERROR;
-    #endif
-    #ifdef ENOTSOCK
-        case ENOTSOCK: return DRWAV_NOT_SOCKET;
-    #endif
-    #ifdef EDESTADDRREQ
-        case EDESTADDRREQ: return DRWAV_NO_ADDRESS;
-    #endif
-    #ifdef EMSGSIZE
-        case EMSGSIZE: return DRWAV_TOO_BIG;
-    #endif
-    #ifdef EPROTOTYPE
-        case EPROTOTYPE: return DRWAV_BAD_PROTOCOL;
-    #endif
-    #ifdef ENOPROTOOPT
-        case ENOPROTOOPT: return DRWAV_PROTOCOL_UNAVAILABLE;
-    #endif
-    #ifdef EPROTONOSUPPORT
-        case EPROTONOSUPPORT: return DRWAV_PROTOCOL_NOT_SUPPORTED;
-    #endif
-    #ifdef ESOCKTNOSUPPORT
-        case ESOCKTNOSUPPORT: return DRWAV_SOCKET_NOT_SUPPORTED;
-    #endif
-    #ifdef EOPNOTSUPP
-        case EOPNOTSUPP: return DRWAV_INVALID_OPERATION;
-    #endif
-    #ifdef EPFNOSUPPORT
-        case EPFNOSUPPORT: return DRWAV_PROTOCOL_FAMILY_NOT_SUPPORTED;
-    #endif
-    #ifdef EAFNOSUPPORT
-        case EAFNOSUPPORT: return DRWAV_ADDRESS_FAMILY_NOT_SUPPORTED;
-    #endif
-    #ifdef EADDRINUSE
-        case EADDRINUSE: return DRWAV_ALREADY_IN_USE;
-    #endif
-    #ifdef EADDRNOTAVAIL
-        case EADDRNOTAVAIL: return DRWAV_ERROR;
-    #endif
-    #ifdef ENETDOWN
-        case ENETDOWN: return DRWAV_NO_NETWORK;
-    #endif
-    #ifdef ENETUNREACH
-        case ENETUNREACH: return DRWAV_NO_NETWORK;
-    #endif
-    #ifdef ENETRESET
-        case ENETRESET: return DRWAV_NO_NETWORK;
-    #endif
-    #ifdef ECONNABORTED
-        case ECONNABORTED: return DRWAV_NO_NETWORK;
-    #endif
-    #ifdef ECONNRESET
-        case ECONNRESET: return DRWAV_CONNECTION_RESET;
-    #endif
-    #ifdef ENOBUFS
-        case ENOBUFS: return DRWAV_NO_SPACE;
-    #endif
-    #ifdef EISCONN
-        case EISCONN: return DRWAV_ALREADY_CONNECTED;
-    #endif
-    #ifdef ENOTCONN
-        case ENOTCONN: return DRWAV_NOT_CONNECTED;
-    #endif
-    #ifdef ESHUTDOWN
-        case ESHUTDOWN: return DRWAV_ERROR;
-    #endif
-    #ifdef ETOOMANYREFS
-        case ETOOMANYREFS: return DRWAV_ERROR;
-    #endif
-    #ifdef ETIMEDOUT
-        case ETIMEDOUT: return DRWAV_TIMEOUT;
-    #endif
-    #ifdef ECONNREFUSED
-        case ECONNREFUSED: return DRWAV_CONNECTION_REFUSED;
-    #endif
-    #ifdef EHOSTDOWN
-        case EHOSTDOWN: return DRWAV_NO_HOST;
-    #endif
-    #ifdef EHOSTUNREACH
-        case EHOSTUNREACH: return DRWAV_NO_HOST;
-    #endif
-    #ifdef EALREADY
-        case EALREADY: return DRWAV_IN_PROGRESS;
-    #endif
-    #ifdef EINPROGRESS
-        case EINPROGRESS: return DRWAV_IN_PROGRESS;
-    #endif
-    #ifdef ESTALE
-        case ESTALE: return DRWAV_INVALID_FILE;
-    #endif
-    #ifdef EUCLEAN
-        case EUCLEAN: return DRWAV_ERROR;
-    #endif
-    #ifdef ENOTNAM
-        case ENOTNAM: return DRWAV_ERROR;
-    #endif
-    #ifdef ENAVAIL
-        case ENAVAIL: return DRWAV_ERROR;
-    #endif
-    #ifdef EISNAM
-        case EISNAM: return DRWAV_ERROR;
-    #endif
-    #ifdef EREMOTEIO
-        case EREMOTEIO: return DRWAV_IO_ERROR;
-    #endif
-    #ifdef EDQUOT
-        case EDQUOT: return DRWAV_NO_SPACE;
-    #endif
-    #ifdef ENOMEDIUM
-        case ENOMEDIUM: return DRWAV_DOES_NOT_EXIST;
-    #endif
-    #ifdef EMEDIUMTYPE
-        case EMEDIUMTYPE: return DRWAV_ERROR;
-    #endif
-    #ifdef ECANCELED
-        case ECANCELED: return DRWAV_CANCELLED;
-    #endif
-    #ifdef ENOKEY
-        case ENOKEY: return DRWAV_ERROR;
-    #endif
-    #ifdef EKEYEXPIRED
-        case EKEYEXPIRED: return DRWAV_ERROR;
-    #endif
-    #ifdef EKEYREVOKED
-        case EKEYREVOKED: return DRWAV_ERROR;
-    #endif
-    #ifdef EKEYREJECTED
-        case EKEYREJECTED: return DRWAV_ERROR;
-    #endif
-    #ifdef EOWNERDEAD
-        case EOWNERDEAD: return DRWAV_ERROR;
-    #endif
-    #ifdef ENOTRECOVERABLE
-        case ENOTRECOVERABLE: return DRWAV_ERROR;
-    #endif
-    #ifdef ERFKILL
-        case ERFKILL: return DRWAV_ERROR;
-    #endif
-    #ifdef EHWPOISON
-        case EHWPOISON: return DRWAV_ERROR;
-    #endif
-        default: return DRWAV_ERROR;
-    }
-}
-
-static drwav_result drwav_fopen(FILE** ppFile, const char* pFilePath, const char* pOpenMode)
-{
-#if _MSC_VER && _MSC_VER >= 1400
-    errno_t err;
-#endif
-
-    if (ppFile != NULL) {
-        *ppFile = NULL;  /* Safety. */
-    }
-
-    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
-        return DRWAV_INVALID_ARGS;
-    }
-
-#if _MSC_VER && _MSC_VER >= 1400
-    err = fopen_s(ppFile, pFilePath, pOpenMode);
-    if (err != 0) {
-        return drwav_result_from_errno(err);
-    }
-#else
-#if defined(_WIN32) || defined(__APPLE__)
-    *ppFile = fopen(pFilePath, pOpenMode);
-#else
-    #if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS == 64 && defined(_LARGEFILE64_SOURCE)
-        *ppFile = fopen64(pFilePath, pOpenMode);
-    #else
-        *ppFile = fopen(pFilePath, pOpenMode);
-    #endif
-#endif
-    if (*ppFile == NULL) {
-        drwav_result result = drwav_result_from_errno(errno);
-        if (result == DRWAV_SUCCESS) {
-            result = DRWAV_ERROR;   /* Just a safety check to make sure we never ever return success when pFile == NULL. */
-        }
-
-        return result;
-    }
-#endif
-
-    return DRWAV_SUCCESS;
-}
-
-/*
-_wfopen() isn't always available in all compilation environments.
-
-    * Windows only.
-    * MSVC seems to support it universally as far back as VC6 from what I can tell (haven't checked further back).
-    * MinGW-64 (both 32- and 64-bit) seems to support it.
-    * MinGW wraps it in !defined(__STRICT_ANSI__).
-
-This can be reviewed as compatibility issues arise. The preference is to use _wfopen_s() and _wfopen() as opposed to the wcsrtombs()
-fallback, so if you notice your compiler not detecting this properly I'm happy to look at adding support.
-*/
-#if defined(_WIN32)
-    #if defined(_MSC_VER) || defined(__MINGW64__) || !defined(__STRICT_ANSI__)
-        #define DRWAV_HAS_WFOPEN
-    #endif
-#endif
-
-static drwav_result drwav_wfopen(FILE** ppFile, const wchar_t* pFilePath, const wchar_t* pOpenMode, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (ppFile != NULL) {
-        *ppFile = NULL;  /* Safety. */
-    }
-
-    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
-        return DRWAV_INVALID_ARGS;
-    }
-
-#if defined(DRWAV_HAS_WFOPEN)
-    {
-        /* Use _wfopen() on Windows. */
-    #if defined(_MSC_VER) && _MSC_VER >= 1400
-        errno_t err = _wfopen_s(ppFile, pFilePath, pOpenMode);
-        if (err != 0) {
-            return drwav_result_from_errno(err);
-        }
-    #else
-        *ppFile = _wfopen(pFilePath, pOpenMode);
-        if (*ppFile == NULL) {
-            return drwav_result_from_errno(errno);
-        }
-    #endif
-        (void)pAllocationCallbacks;
-    }
-#else
-    /*
-    Use fopen() on anything other than Windows. Requires a conversion. This is annoying because fopen() is locale specific. The only real way I can
-    think of to do this is with wcsrtombs(). Note that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for
-    maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler error I'll look into improving compatibility.
-    */
-    {
-        mbstate_t mbs;
-        size_t lenMB;
-        const wchar_t* pFilePathTemp = pFilePath;
-        char* pFilePathMB = NULL;
-        char pOpenModeMB[32] = {0};
-
-        /* Get the length first. */
-        DRWAV_ZERO_OBJECT(&mbs);
-        lenMB = wcsrtombs(NULL, &pFilePathTemp, 0, &mbs);
-        if (lenMB == (size_t)-1) {
-            return drwav_result_from_errno(errno);
-        }
-
-        pFilePathMB = (char*)drwav__malloc_from_callbacks(lenMB + 1, pAllocationCallbacks);
-        if (pFilePathMB == NULL) {
-            return DRWAV_OUT_OF_MEMORY;
-        }
-
-        pFilePathTemp = pFilePath;
-        DRWAV_ZERO_OBJECT(&mbs);
-        wcsrtombs(pFilePathMB, &pFilePathTemp, lenMB + 1, &mbs);
-
-        /* The open mode should always consist of ASCII characters so we should be able to do a trivial conversion. */
-        {
-            size_t i = 0;
-            for (;;) {
-                if (pOpenMode[i] == 0) {
-                    pOpenModeMB[i] = '\0';
-                    break;
-                }
-
-                pOpenModeMB[i] = (char)pOpenMode[i];
-                i += 1;
-            }
-        }
-
-        *ppFile = fopen(pFilePathMB, pOpenModeMB);
-
-        drwav__free_from_callbacks(pFilePathMB, pAllocationCallbacks);
-    }
-
-    if (*ppFile == NULL) {
-        return DRWAV_ERROR;
-    }
-#endif
-
-    return DRWAV_SUCCESS;
-}
-
-
-static size_t drwav__on_read_stdio(void* pUserData, void* pBufferOut, size_t bytesToRead)
-{
-    return fread(pBufferOut, 1, bytesToRead, (FILE*)pUserData);
-}
-
-static size_t drwav__on_write_stdio(void* pUserData, const void* pData, size_t bytesToWrite)
-{
-    return fwrite(pData, 1, bytesToWrite, (FILE*)pUserData);
-}
-
-static drwav_bool32 drwav__on_seek_stdio(void* pUserData, int offset, drwav_seek_origin origin)
-{
-    return fseek((FILE*)pUserData, offset, (origin == drwav_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0;
-}
-
-DRWAV_API drwav_bool32 drwav_init_file(drwav* pWav, const char* filename, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    return drwav_init_file_ex(pWav, filename, NULL, NULL, 0, pAllocationCallbacks);
-}
-
-
-static drwav_bool32 drwav_init_file__internal_FILE(drwav* pWav, FILE* pFile, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav_bool32 result;
-
-    result = drwav_preinit(pWav, drwav__on_read_stdio, drwav__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
-    if (result != DRWAV_TRUE) {
-        fclose(pFile);
-        return result;
-    }
-
-    result = drwav_init__internal(pWav, onChunk, pChunkUserData, flags);
-    if (result != DRWAV_TRUE) {
-        fclose(pFile);
-        return result;
-    }
-
-    return DRWAV_TRUE;
-}
-
-DRWAV_API drwav_bool32 drwav_init_file_ex(drwav* pWav, const char* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    FILE* pFile;
-    if (drwav_fopen(&pFile, filename, "rb") != DRWAV_SUCCESS) {
-        return DRWAV_FALSE;
-    }
-
-    /* This takes ownership of the FILE* object. */
-    return drwav_init_file__internal_FILE(pWav, pFile, onChunk, pChunkUserData, flags, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_file_w(drwav* pWav, const wchar_t* filename, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    return drwav_init_file_ex_w(pWav, filename, NULL, NULL, 0, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_file_ex_w(drwav* pWav, const wchar_t* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    FILE* pFile;
-    if (drwav_wfopen(&pFile, filename, L"rb", pAllocationCallbacks) != DRWAV_SUCCESS) {
-        return DRWAV_FALSE;
-    }
-
-    /* This takes ownership of the FILE* object. */
-    return drwav_init_file__internal_FILE(pWav, pFile, onChunk, pChunkUserData, flags, pAllocationCallbacks);
-}
-
-
-static drwav_bool32 drwav_init_file_write__internal_FILE(drwav* pWav, FILE* pFile, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav_bool32 result;
-
-    result = drwav_preinit_write(pWav, pFormat, isSequential, drwav__on_write_stdio, drwav__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
-    if (result != DRWAV_TRUE) {
-        fclose(pFile);
-        return result;
-    }
-
-    result = drwav_init_write__internal(pWav, pFormat, totalSampleCount);
-    if (result != DRWAV_TRUE) {
-        fclose(pFile);
-        return result;
-    }
-
-    return DRWAV_TRUE;
-}
-
-static drwav_bool32 drwav_init_file_write__internal(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    FILE* pFile;
-    if (drwav_fopen(&pFile, filename, "wb") != DRWAV_SUCCESS) {
-        return DRWAV_FALSE;
-    }
-
-    /* This takes ownership of the FILE* object. */
-    return drwav_init_file_write__internal_FILE(pWav, pFile, pFormat, totalSampleCount, isSequential, pAllocationCallbacks);
-}
-
-static drwav_bool32 drwav_init_file_write_w__internal(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    FILE* pFile;
-    if (drwav_wfopen(&pFile, filename, L"wb", pAllocationCallbacks) != DRWAV_SUCCESS) {
-        return DRWAV_FALSE;
-    }
-
-    /* This takes ownership of the FILE* object. */
-    return drwav_init_file_write__internal_FILE(pWav, pFile, pFormat, totalSampleCount, isSequential, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_file_write(drwav* pWav, const char* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    return drwav_init_file_write__internal(pWav, filename, pFormat, 0, DRWAV_FALSE, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_file_write_sequential(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    return drwav_init_file_write__internal(pWav, filename, pFormat, totalSampleCount, DRWAV_TRUE, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pFormat == NULL) {
-        return DRWAV_FALSE;
-    }
-
-    return drwav_init_file_write_sequential(pWav, filename, pFormat, totalPCMFrameCount*pFormat->channels, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_file_write_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    return drwav_init_file_write_w__internal(pWav, filename, pFormat, 0, DRWAV_FALSE, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_file_write_sequential_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    return drwav_init_file_write_w__internal(pWav, filename, pFormat, totalSampleCount, DRWAV_TRUE, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pFormat == NULL) {
-        return DRWAV_FALSE;
-    }
-
-    return drwav_init_file_write_sequential_w(pWav, filename, pFormat, totalPCMFrameCount*pFormat->channels, pAllocationCallbacks);
-}
-#endif  /* DR_WAV_NO_STDIO */
-
-
-static size_t drwav__on_read_memory(void* pUserData, void* pBufferOut, size_t bytesToRead)
-{
-    drwav* pWav = (drwav*)pUserData;
-    size_t bytesRemaining;
-
-    DRWAV_ASSERT(pWav != NULL);
-    DRWAV_ASSERT(pWav->memoryStream.dataSize >= pWav->memoryStream.currentReadPos);
-
-    bytesRemaining = pWav->memoryStream.dataSize - pWav->memoryStream.currentReadPos;
-    if (bytesToRead > bytesRemaining) {
-        bytesToRead = bytesRemaining;
-    }
-
-    if (bytesToRead > 0) {
-        DRWAV_COPY_MEMORY(pBufferOut, pWav->memoryStream.data + pWav->memoryStream.currentReadPos, bytesToRead);
-        pWav->memoryStream.currentReadPos += bytesToRead;
-    }
-
-    return bytesToRead;
-}
-
-static drwav_bool32 drwav__on_seek_memory(void* pUserData, int offset, drwav_seek_origin origin)
-{
-    drwav* pWav = (drwav*)pUserData;
-    DRWAV_ASSERT(pWav != NULL);
-
-    if (origin == drwav_seek_origin_current) {
-        if (offset > 0) {
-            if (pWav->memoryStream.currentReadPos + offset > pWav->memoryStream.dataSize) {
-                return DRWAV_FALSE; /* Trying to seek too far forward. */
-            }
-        } else {
-            if (pWav->memoryStream.currentReadPos < (size_t)-offset) {
-                return DRWAV_FALSE; /* Trying to seek too far backwards. */
-            }
-        }
-
-        /* This will never underflow thanks to the clamps above. */
-        pWav->memoryStream.currentReadPos += offset;
-    } else {
-        if ((drwav_uint32)offset <= pWav->memoryStream.dataSize) {
-            pWav->memoryStream.currentReadPos = offset;
-        } else {
-            return DRWAV_FALSE; /* Trying to seek too far forward. */
-        }
-    }
-    
-    return DRWAV_TRUE;
-}
-
-static size_t drwav__on_write_memory(void* pUserData, const void* pDataIn, size_t bytesToWrite)
-{
-    drwav* pWav = (drwav*)pUserData;
-    size_t bytesRemaining;
-
-    DRWAV_ASSERT(pWav != NULL);
-    DRWAV_ASSERT(pWav->memoryStreamWrite.dataCapacity >= pWav->memoryStreamWrite.currentWritePos);
-
-    bytesRemaining = pWav->memoryStreamWrite.dataCapacity - pWav->memoryStreamWrite.currentWritePos;
-    if (bytesRemaining < bytesToWrite) {
-        /* Need to reallocate. */
-        void* pNewData;
-        size_t newDataCapacity = (pWav->memoryStreamWrite.dataCapacity == 0) ? 256 : pWav->memoryStreamWrite.dataCapacity * 2;
-
-        /* If doubling wasn't enough, just make it the minimum required size to write the data. */
-        if ((newDataCapacity - pWav->memoryStreamWrite.currentWritePos) < bytesToWrite) {
-            newDataCapacity = pWav->memoryStreamWrite.currentWritePos + bytesToWrite;
-        }
-
-        pNewData = drwav__realloc_from_callbacks(*pWav->memoryStreamWrite.ppData, newDataCapacity, pWav->memoryStreamWrite.dataCapacity, &pWav->allocationCallbacks);
-        if (pNewData == NULL) {
-            return 0;
-        }
-
-        *pWav->memoryStreamWrite.ppData = pNewData;
-        pWav->memoryStreamWrite.dataCapacity = newDataCapacity;
-    }
-
-    DRWAV_COPY_MEMORY(((drwav_uint8*)(*pWav->memoryStreamWrite.ppData)) + pWav->memoryStreamWrite.currentWritePos, pDataIn, bytesToWrite);
-
-    pWav->memoryStreamWrite.currentWritePos += bytesToWrite;
-    if (pWav->memoryStreamWrite.dataSize < pWav->memoryStreamWrite.currentWritePos) {
-        pWav->memoryStreamWrite.dataSize = pWav->memoryStreamWrite.currentWritePos;
-    }
-
-    *pWav->memoryStreamWrite.pDataSize = pWav->memoryStreamWrite.dataSize;
-
-    return bytesToWrite;
-}
-
-static drwav_bool32 drwav__on_seek_memory_write(void* pUserData, int offset, drwav_seek_origin origin)
-{
-    drwav* pWav = (drwav*)pUserData;
-    DRWAV_ASSERT(pWav != NULL);
-
-    if (origin == drwav_seek_origin_current) {
-        if (offset > 0) {
-            if (pWav->memoryStreamWrite.currentWritePos + offset > pWav->memoryStreamWrite.dataSize) {
-                offset = (int)(pWav->memoryStreamWrite.dataSize - pWav->memoryStreamWrite.currentWritePos);  /* Trying to seek too far forward. */
-            }
-        } else {
-            if (pWav->memoryStreamWrite.currentWritePos < (size_t)-offset) {
-                offset = -(int)pWav->memoryStreamWrite.currentWritePos;  /* Trying to seek too far backwards. */
-            }
-        }
-
-        /* This will never underflow thanks to the clamps above. */
-        pWav->memoryStreamWrite.currentWritePos += offset;
-    } else {
-        if ((drwav_uint32)offset <= pWav->memoryStreamWrite.dataSize) {
-            pWav->memoryStreamWrite.currentWritePos = offset;
-        } else {
-            pWav->memoryStreamWrite.currentWritePos = pWav->memoryStreamWrite.dataSize;  /* Trying to seek too far forward. */
-        }
-    }
-    
-    return DRWAV_TRUE;
-}
-
-DRWAV_API drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    return drwav_init_memory_ex(pWav, data, dataSize, NULL, NULL, 0, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_memory_ex(drwav* pWav, const void* data, size_t dataSize, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (data == NULL || dataSize == 0) {
-        return DRWAV_FALSE;
-    }
-
-    if (!drwav_preinit(pWav, drwav__on_read_memory, drwav__on_seek_memory, pWav, pAllocationCallbacks)) {
-        return DRWAV_FALSE;
-    }
-
-    pWav->memoryStream.data = (const drwav_uint8*)data;
-    pWav->memoryStream.dataSize = dataSize;
-    pWav->memoryStream.currentReadPos = 0;
-
-    return drwav_init__internal(pWav, onChunk, pChunkUserData, flags);
-}
-
-
-static drwav_bool32 drwav_init_memory_write__internal(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (ppData == NULL || pDataSize == NULL) {
-        return DRWAV_FALSE;
-    }
-
-    *ppData = NULL; /* Important because we're using realloc()! */
-    *pDataSize = 0;
-
-    if (!drwav_preinit_write(pWav, pFormat, isSequential, drwav__on_write_memory, drwav__on_seek_memory_write, pWav, pAllocationCallbacks)) {
-        return DRWAV_FALSE;
-    }
-
-    pWav->memoryStreamWrite.ppData = ppData;
-    pWav->memoryStreamWrite.pDataSize = pDataSize;
-    pWav->memoryStreamWrite.dataSize = 0;
-    pWav->memoryStreamWrite.dataCapacity = 0;
-    pWav->memoryStreamWrite.currentWritePos = 0;
-
-    return drwav_init_write__internal(pWav, pFormat, totalSampleCount);
-}
-
-DRWAV_API drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    return drwav_init_memory_write__internal(pWav, ppData, pDataSize, pFormat, 0, DRWAV_FALSE, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_memory_write_sequential(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    return drwav_init_memory_write__internal(pWav, ppData, pDataSize, pFormat, totalSampleCount, DRWAV_TRUE, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_memory_write_sequential_pcm_frames(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pFormat == NULL) {
-        return DRWAV_FALSE;
-    }
-
-    return drwav_init_memory_write_sequential(pWav, ppData, pDataSize, pFormat, totalPCMFrameCount*pFormat->channels, pAllocationCallbacks);
-}
-
-
-
-DRWAV_API drwav_result drwav_uninit(drwav* pWav)
-{
-    drwav_result result = DRWAV_SUCCESS;
-
-    if (pWav == NULL) {
-        return DRWAV_INVALID_ARGS;
-    }
-
-    /*
-    If the drwav object was opened in write mode we'll need to finalize a few things:
-      - Make sure the "data" chunk is aligned to 16-bits for RIFF containers, or 64 bits for W64 containers.
-      - Set the size of the "data" chunk.
-    */
-    if (pWav->onWrite != NULL) {
-        drwav_uint32 paddingSize = 0;
-
-        /* Padding. Do not adjust pWav->dataChunkDataSize - this should not include the padding. */
-        if (pWav->container == drwav_container_riff) {
-            paddingSize = drwav__chunk_padding_size_riff(pWav->dataChunkDataSize);
-        } else {
-            paddingSize = drwav__chunk_padding_size_w64(pWav->dataChunkDataSize);
-        }
-        
-        if (paddingSize > 0) {
-            drwav_uint64 paddingData = 0;
-            pWav->onWrite(pWav->pUserData, &paddingData, paddingSize);
-        }
-
-        /*
-        Chunk sizes. When using sequential mode, these will have been filled in at initialization time. We only need
-        to do this when using non-sequential mode.
-        */
-        if (pWav->onSeek && !pWav->isSequentialWrite) {
-            if (pWav->container == drwav_container_riff) {
-                /* The "RIFF" chunk size. */
-                if (pWav->onSeek(pWav->pUserData, 4, drwav_seek_origin_start)) {
-                    drwav_uint32 riffChunkSize = drwav__riff_chunk_size_riff(pWav->dataChunkDataSize);
-                    pWav->onWrite(pWav->pUserData, &riffChunkSize, 4);
-                }
-
-                /* the "data" chunk size. */
-                if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos + 4, drwav_seek_origin_start)) {
-                    drwav_uint32 dataChunkSize = drwav__data_chunk_size_riff(pWav->dataChunkDataSize);
-                    pWav->onWrite(pWav->pUserData, &dataChunkSize, 4);
-                }
-            } else {
-                /* The "RIFF" chunk size. */
-                if (pWav->onSeek(pWav->pUserData, 16, drwav_seek_origin_start)) {
-                    drwav_uint64 riffChunkSize = drwav__riff_chunk_size_w64(pWav->dataChunkDataSize);
-                    pWav->onWrite(pWav->pUserData, &riffChunkSize, 8);
-                }
-
-                /* The "data" chunk size. */
-                if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos + 16, drwav_seek_origin_start)) {
-                    drwav_uint64 dataChunkSize = drwav__data_chunk_size_w64(pWav->dataChunkDataSize);
-                    pWav->onWrite(pWav->pUserData, &dataChunkSize, 8);
-                }
-            }
-        }
-
-        /* Validation for sequential mode. */
-        if (pWav->isSequentialWrite) {
-            if (pWav->dataChunkDataSize != pWav->dataChunkDataSizeTargetWrite) {
-                result = DRWAV_INVALID_FILE;
-            }
-        }
-    }
-
-#ifndef DR_WAV_NO_STDIO
-    /*
-    If we opened the file with drwav_open_file() we will want to close the file handle. We can know whether or not drwav_open_file()
-    was used by looking at the onRead and onSeek callbacks.
-    */
-    if (pWav->onRead == drwav__on_read_stdio || pWav->onWrite == drwav__on_write_stdio) {
-        fclose((FILE*)pWav->pUserData);
-    }
-#endif
-
-    return result;
-}
-
-
-
-DRWAV_API size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut)
-{
-    size_t bytesRead;
-
-    if (pWav == NULL || bytesToRead == 0 || pBufferOut == NULL) {
-        return 0;
-    }
-
-    if (bytesToRead > pWav->bytesRemaining) {
-        bytesToRead = (size_t)pWav->bytesRemaining;
-    }
-
-    bytesRead = pWav->onRead(pWav->pUserData, pBufferOut, bytesToRead);
-
-    pWav->bytesRemaining -= bytesRead;
-    return bytesRead;
-}
-
-
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_le(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut)
-{
-    drwav_uint32 bytesPerFrame;
-
-    if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) {
-        return 0;
-    }
-
-    /* Cannot use this function for compressed formats. */
-    if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
-        return 0;
-    }
-
-    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    /* Don't try to read more samples than can potentially fit in the output buffer. */
-    if (framesToRead * bytesPerFrame > DRWAV_SIZE_MAX) {
-        framesToRead = DRWAV_SIZE_MAX / bytesPerFrame;
-    }
-
-    return drwav_read_raw(pWav, (size_t)(framesToRead * bytesPerFrame), pBufferOut) / bytesPerFrame;
-}
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_be(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut)
-{
-    drwav_uint64 framesRead = drwav_read_pcm_frames_le(pWav, framesToRead, pBufferOut);
-    drwav__bswap_samples(pBufferOut, framesRead*pWav->channels, drwav_get_bytes_per_pcm_frame(pWav)/pWav->channels, pWav->translatedFormatTag);
-
-    return framesRead;
-}
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut)
-{
-    if (drwav__is_little_endian()) {
-        return drwav_read_pcm_frames_le(pWav, framesToRead, pBufferOut);
-    } else {
-        return drwav_read_pcm_frames_be(pWav, framesToRead, pBufferOut);
-    }
-}
-
-
-
-DRWAV_API drwav_bool32 drwav_seek_to_first_pcm_frame(drwav* pWav)
-{
-    if (pWav->onWrite != NULL) {
-        return DRWAV_FALSE; /* No seeking in write mode. */
-    }
-
-    if (!pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos, drwav_seek_origin_start)) {
-        return DRWAV_FALSE;
-    }
-
-    if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
-        pWav->compressed.iCurrentPCMFrame = 0;
-    }
-    
-    pWav->bytesRemaining = pWav->dataChunkDataSize;
-    return DRWAV_TRUE;
-}
-
-DRWAV_API drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64 targetFrameIndex)
-{
-    /* Seeking should be compatible with wave files > 2GB. */
-
-    if (pWav == NULL || pWav->onSeek == NULL) {
-        return DRWAV_FALSE;
-    }
-
-    /* No seeking in write mode. */
-    if (pWav->onWrite != NULL) {
-        return DRWAV_FALSE;
-    }
-
-    /* If there are no samples, just return DRWAV_TRUE without doing anything. */
-    if (pWav->totalPCMFrameCount == 0) {
-        return DRWAV_TRUE;
-    }
-
-    /* Make sure the sample is clamped. */
-    if (targetFrameIndex >= pWav->totalPCMFrameCount) {
-        targetFrameIndex  = pWav->totalPCMFrameCount - 1;
-    }
-
-    /*
-    For compressed formats we just use a slow generic seek. If we are seeking forward we just seek forward. If we are going backwards we need
-    to seek back to the start.
-    */
-    if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
-        /* TODO: This can be optimized. */
-        
-        /*
-        If we're seeking forward it's simple - just keep reading samples until we hit the sample we're requesting. If we're seeking backwards,
-        we first need to seek back to the start and then just do the same thing as a forward seek.
-        */
-        if (targetFrameIndex < pWav->compressed.iCurrentPCMFrame) {
-            if (!drwav_seek_to_first_pcm_frame(pWav)) {
-                return DRWAV_FALSE;
-            }
-        }
-
-        if (targetFrameIndex > pWav->compressed.iCurrentPCMFrame) {
-            drwav_uint64 offsetInFrames = targetFrameIndex - pWav->compressed.iCurrentPCMFrame;
-
-            drwav_int16 devnull[2048];
-            while (offsetInFrames > 0) {
-                drwav_uint64 framesRead = 0;
-                drwav_uint64 framesToRead = offsetInFrames;
-                if (framesToRead > drwav_countof(devnull)/pWav->channels) {
-                    framesToRead = drwav_countof(devnull)/pWav->channels;
-                }
-
-                if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
-                    framesRead = drwav_read_pcm_frames_s16__msadpcm(pWav, framesToRead, devnull);
-                } else if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
-                    framesRead = drwav_read_pcm_frames_s16__ima(pWav, framesToRead, devnull);
-                } else {
-                    DRWAV_ASSERT(DRWAV_FALSE);  /* If this assertion is triggered it means I've implemented a new compressed format but forgot to add a branch for it here. */
-                }
-
-                if (framesRead != framesToRead) {
-                    return DRWAV_FALSE;
-                }
-
-                offsetInFrames -= framesRead;
-            }
-        }
-    } else {
-        drwav_uint64 totalSizeInBytes;
-        drwav_uint64 currentBytePos;
-        drwav_uint64 targetBytePos;
-        drwav_uint64 offset;
-
-        totalSizeInBytes = pWav->totalPCMFrameCount * drwav_get_bytes_per_pcm_frame(pWav);
-        DRWAV_ASSERT(totalSizeInBytes >= pWav->bytesRemaining);
-
-        currentBytePos = totalSizeInBytes - pWav->bytesRemaining;
-        targetBytePos  = targetFrameIndex * drwav_get_bytes_per_pcm_frame(pWav);
-
-        if (currentBytePos < targetBytePos) {
-            /* Offset forwards. */
-            offset = (targetBytePos - currentBytePos);
-        } else {
-            /* Offset backwards. */
-            if (!drwav_seek_to_first_pcm_frame(pWav)) {
-                return DRWAV_FALSE;
-            }
-            offset = targetBytePos;
-        }
-
-        while (offset > 0) {
-            int offset32 = ((offset > INT_MAX) ? INT_MAX : (int)offset);
-            if (!pWav->onSeek(pWav->pUserData, offset32, drwav_seek_origin_current)) {
-                return DRWAV_FALSE;
-            }
-
-            pWav->bytesRemaining -= offset32;
-            offset -= offset32;
-        }
-    }
-
-    return DRWAV_TRUE;
-}
-
-
-DRWAV_API size_t drwav_write_raw(drwav* pWav, size_t bytesToWrite, const void* pData)
-{
-    size_t bytesWritten;
-
-    if (pWav == NULL || bytesToWrite == 0 || pData == NULL) {
-        return 0;
-    }
-
-    bytesWritten = pWav->onWrite(pWav->pUserData, pData, bytesToWrite);
-    pWav->dataChunkDataSize += bytesWritten;
-
-    return bytesWritten;
-}
-
-
-DRWAV_API drwav_uint64 drwav_write_pcm_frames_le(drwav* pWav, drwav_uint64 framesToWrite, const void* pData)
-{
-    drwav_uint64 bytesToWrite;
-    drwav_uint64 bytesWritten;
-    const drwav_uint8* pRunningData;
-
-    if (pWav == NULL || framesToWrite == 0 || pData == NULL) {
-        return 0;
-    }
-
-    bytesToWrite = ((framesToWrite * pWav->channels * pWav->bitsPerSample) / 8);
-    if (bytesToWrite > DRWAV_SIZE_MAX) {
-        return 0;
-    }
-
-    bytesWritten = 0;
-    pRunningData = (const drwav_uint8*)pData;
-
-    while (bytesToWrite > 0) {
-        size_t bytesJustWritten;
-        drwav_uint64 bytesToWriteThisIteration;
-
-        bytesToWriteThisIteration = bytesToWrite;
-        DRWAV_ASSERT(bytesToWriteThisIteration <= DRWAV_SIZE_MAX);  /* <-- This is checked above. */
-
-        bytesJustWritten = drwav_write_raw(pWav, (size_t)bytesToWriteThisIteration, pRunningData);
-        if (bytesJustWritten == 0) {
-            break;
-        }
-
-        bytesToWrite -= bytesJustWritten;
-        bytesWritten += bytesJustWritten;
-        pRunningData += bytesJustWritten;
-    }
-
-    return (bytesWritten * 8) / pWav->bitsPerSample / pWav->channels;
-}
-
-DRWAV_API drwav_uint64 drwav_write_pcm_frames_be(drwav* pWav, drwav_uint64 framesToWrite, const void* pData)
-{
-    drwav_uint64 bytesToWrite;
-    drwav_uint64 bytesWritten;
-    drwav_uint32 bytesPerSample;
-    const drwav_uint8* pRunningData;
-
-    if (pWav == NULL || framesToWrite == 0 || pData == NULL) {
-        return 0;
-    }
-
-    bytesToWrite = ((framesToWrite * pWav->channels * pWav->bitsPerSample) / 8);
-    if (bytesToWrite > DRWAV_SIZE_MAX) {
-        return 0;
-    }
-
-    bytesWritten = 0;
-    pRunningData = (const drwav_uint8*)pData;
-
-    bytesPerSample = drwav_get_bytes_per_pcm_frame(pWav) / pWav->channels;
-    
-    while (bytesToWrite > 0) {
-        drwav_uint8 temp[4096];
-        drwav_uint32 sampleCount;
-        size_t bytesJustWritten;
-        drwav_uint64 bytesToWriteThisIteration;
-
-        bytesToWriteThisIteration = bytesToWrite;
-        DRWAV_ASSERT(bytesToWriteThisIteration <= DRWAV_SIZE_MAX);  /* <-- This is checked above. */
-
-        /*
-        WAV files are always little-endian. We need to byte swap on big-endian architectures. Since our input buffer is read-only we need
-        to use an intermediary buffer for the conversion.
-        */
-        sampleCount = sizeof(temp)/bytesPerSample;
-
-        if (bytesToWriteThisIteration > ((drwav_uint64)sampleCount)*bytesPerSample) {
-            bytesToWriteThisIteration = ((drwav_uint64)sampleCount)*bytesPerSample;
-        }
-
-        DRWAV_COPY_MEMORY(temp, pRunningData, (size_t)bytesToWriteThisIteration);
-        drwav__bswap_samples(temp, sampleCount, bytesPerSample, pWav->translatedFormatTag);
-
-        bytesJustWritten = drwav_write_raw(pWav, (size_t)bytesToWriteThisIteration, temp);
-        if (bytesJustWritten == 0) {
-            break;
-        }
-
-        bytesToWrite -= bytesJustWritten;
-        bytesWritten += bytesJustWritten;
-        pRunningData += bytesJustWritten;
-    }
-
-    return (bytesWritten * 8) / pWav->bitsPerSample / pWav->channels;
-}
-
-DRWAV_API drwav_uint64 drwav_write_pcm_frames(drwav* pWav, drwav_uint64 framesToWrite, const void* pData)
-{
-    if (drwav__is_little_endian()) {
-        return drwav_write_pcm_frames_le(pWav, framesToWrite, pData);
-    } else {
-        return drwav_write_pcm_frames_be(pWav, framesToWrite, pData);
-    }
-}
-
-
-static drwav_uint64 drwav_read_pcm_frames_s16__msadpcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
-{
-    drwav_uint64 totalFramesRead = 0;
-
-    DRWAV_ASSERT(pWav != NULL);
-    DRWAV_ASSERT(framesToRead > 0);
-    DRWAV_ASSERT(pBufferOut != NULL);
-
-    /* TODO: Lots of room for optimization here. */
-
-    while (framesToRead > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) {
-        /* If there are no cached frames we need to load a new block. */
-        if (pWav->msadpcm.cachedFrameCount == 0 && pWav->msadpcm.bytesRemainingInBlock == 0) {
-            if (pWav->channels == 1) {
-                /* Mono. */
-                drwav_uint8 header[7];
-                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
-                    return totalFramesRead;
-                }
-                pWav->msadpcm.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
-
-                pWav->msadpcm.predictor[0]     = header[0];
-                pWav->msadpcm.delta[0]         = drwav__bytes_to_s16(header + 1);
-                pWav->msadpcm.prevFrames[0][1] = (drwav_int32)drwav__bytes_to_s16(header + 3);
-                pWav->msadpcm.prevFrames[0][0] = (drwav_int32)drwav__bytes_to_s16(header + 5);
-                pWav->msadpcm.cachedFrames[2]  = pWav->msadpcm.prevFrames[0][0];
-                pWav->msadpcm.cachedFrames[3]  = pWav->msadpcm.prevFrames[0][1];
-                pWav->msadpcm.cachedFrameCount = 2;
-            } else {
-                /* Stereo. */
-                drwav_uint8 header[14];
-                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
-                    return totalFramesRead;
-                }
-                pWav->msadpcm.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
-
-                pWav->msadpcm.predictor[0] = header[0];
-                pWav->msadpcm.predictor[1] = header[1];
-                pWav->msadpcm.delta[0] = drwav__bytes_to_s16(header + 2);
-                pWav->msadpcm.delta[1] = drwav__bytes_to_s16(header + 4);
-                pWav->msadpcm.prevFrames[0][1] = (drwav_int32)drwav__bytes_to_s16(header + 6);
-                pWav->msadpcm.prevFrames[1][1] = (drwav_int32)drwav__bytes_to_s16(header + 8);
-                pWav->msadpcm.prevFrames[0][0] = (drwav_int32)drwav__bytes_to_s16(header + 10);
-                pWav->msadpcm.prevFrames[1][0] = (drwav_int32)drwav__bytes_to_s16(header + 12);
-
-                pWav->msadpcm.cachedFrames[0] = pWav->msadpcm.prevFrames[0][0];
-                pWav->msadpcm.cachedFrames[1] = pWav->msadpcm.prevFrames[1][0];
-                pWav->msadpcm.cachedFrames[2] = pWav->msadpcm.prevFrames[0][1];
-                pWav->msadpcm.cachedFrames[3] = pWav->msadpcm.prevFrames[1][1];
-                pWav->msadpcm.cachedFrameCount = 2;
-            }
-        }
-
-        /* Output anything that's cached. */
-        while (framesToRead > 0 && pWav->msadpcm.cachedFrameCount > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) {
-            drwav_uint32 iSample = 0;
-            for (iSample = 0; iSample < pWav->channels; iSample += 1) {
-                pBufferOut[iSample] = (drwav_int16)pWav->msadpcm.cachedFrames[(drwav_countof(pWav->msadpcm.cachedFrames) - (pWav->msadpcm.cachedFrameCount*pWav->channels)) + iSample];
-            }
-
-            pBufferOut      += pWav->channels;
-            framesToRead    -= 1;
-            totalFramesRead += 1;
-            pWav->compressed.iCurrentPCMFrame += 1;
-            pWav->msadpcm.cachedFrameCount -= 1;
-        }
-
-        if (framesToRead == 0) {
-            return totalFramesRead;
-        }
-
-
-        /*
-        If there's nothing left in the cache, just go ahead and load more. If there's nothing left to load in the current block we just continue to the next
-        loop iteration which will trigger the loading of a new block.
-        */
-        if (pWav->msadpcm.cachedFrameCount == 0) {
-            if (pWav->msadpcm.bytesRemainingInBlock == 0) {
-                continue;
-            } else {
-                static drwav_int32 adaptationTable[] = { 
-                    230, 230, 230, 230, 307, 409, 512, 614, 
-                    768, 614, 512, 409, 307, 230, 230, 230 
-                };
-                static drwav_int32 coeff1Table[] = { 256, 512, 0, 192, 240, 460,  392 };
-                static drwav_int32 coeff2Table[] = { 0,  -256, 0, 64,  0,  -208, -232 };
-
-                drwav_uint8 nibbles;
-                drwav_int32 nibble0;
-                drwav_int32 nibble1;
-
-                if (pWav->onRead(pWav->pUserData, &nibbles, 1) != 1) {
-                    return totalFramesRead;
-                }
-                pWav->msadpcm.bytesRemainingInBlock -= 1;
-
-                /* TODO: Optimize away these if statements. */
-                nibble0 = ((nibbles & 0xF0) >> 4); if ((nibbles & 0x80)) { nibble0 |= 0xFFFFFFF0UL; }
-                nibble1 = ((nibbles & 0x0F) >> 0); if ((nibbles & 0x08)) { nibble1 |= 0xFFFFFFF0UL; }
-
-                if (pWav->channels == 1) {
-                    /* Mono. */
-                    drwav_int32 newSample0;
-                    drwav_int32 newSample1;
-
-                    newSample0  = ((pWav->msadpcm.prevFrames[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevFrames[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8;
-                    newSample0 += nibble0 * pWav->msadpcm.delta[0];
-                    newSample0  = drwav_clamp(newSample0, -32768, 32767);
-
-                    pWav->msadpcm.delta[0] = (adaptationTable[((nibbles & 0xF0) >> 4)] * pWav->msadpcm.delta[0]) >> 8;
-                    if (pWav->msadpcm.delta[0] < 16) {
-                        pWav->msadpcm.delta[0] = 16;
-                    }
-
-                    pWav->msadpcm.prevFrames[0][0] = pWav->msadpcm.prevFrames[0][1];
-                    pWav->msadpcm.prevFrames[0][1] = newSample0;
-
-
-                    newSample1  = ((pWav->msadpcm.prevFrames[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevFrames[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8;
-                    newSample1 += nibble1 * pWav->msadpcm.delta[0];
-                    newSample1  = drwav_clamp(newSample1, -32768, 32767);
-
-                    pWav->msadpcm.delta[0] = (adaptationTable[((nibbles & 0x0F) >> 0)] * pWav->msadpcm.delta[0]) >> 8;
-                    if (pWav->msadpcm.delta[0] < 16) {
-                        pWav->msadpcm.delta[0] = 16;
-                    }
-
-                    pWav->msadpcm.prevFrames[0][0] = pWav->msadpcm.prevFrames[0][1];
-                    pWav->msadpcm.prevFrames[0][1] = newSample1;
-
-
-                    pWav->msadpcm.cachedFrames[2] = newSample0;
-                    pWav->msadpcm.cachedFrames[3] = newSample1;
-                    pWav->msadpcm.cachedFrameCount = 2;
-                } else {
-                    /* Stereo. */
-                    drwav_int32 newSample0;
-                    drwav_int32 newSample1;
-
-                    /* Left. */
-                    newSample0  = ((pWav->msadpcm.prevFrames[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevFrames[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8;
-                    newSample0 += nibble0 * pWav->msadpcm.delta[0];
-                    newSample0  = drwav_clamp(newSample0, -32768, 32767);
-
-                    pWav->msadpcm.delta[0] = (adaptationTable[((nibbles & 0xF0) >> 4)] * pWav->msadpcm.delta[0]) >> 8;
-                    if (pWav->msadpcm.delta[0] < 16) {
-                        pWav->msadpcm.delta[0] = 16;
-                    }
-
-                    pWav->msadpcm.prevFrames[0][0] = pWav->msadpcm.prevFrames[0][1];
-                    pWav->msadpcm.prevFrames[0][1] = newSample0;
-
-
-                    /* Right. */
-                    newSample1  = ((pWav->msadpcm.prevFrames[1][1] * coeff1Table[pWav->msadpcm.predictor[1]]) + (pWav->msadpcm.prevFrames[1][0] * coeff2Table[pWav->msadpcm.predictor[1]])) >> 8;
-                    newSample1 += nibble1 * pWav->msadpcm.delta[1];
-                    newSample1  = drwav_clamp(newSample1, -32768, 32767);
-
-                    pWav->msadpcm.delta[1] = (adaptationTable[((nibbles & 0x0F) >> 0)] * pWav->msadpcm.delta[1]) >> 8;
-                    if (pWav->msadpcm.delta[1] < 16) {
-                        pWav->msadpcm.delta[1] = 16;
-                    }
-
-                    pWav->msadpcm.prevFrames[1][0] = pWav->msadpcm.prevFrames[1][1];
-                    pWav->msadpcm.prevFrames[1][1] = newSample1;
-
-                    pWav->msadpcm.cachedFrames[2] = newSample0;
-                    pWav->msadpcm.cachedFrames[3] = newSample1;
-                    pWav->msadpcm.cachedFrameCount = 1;
-                }
-            }
-        }
-    }
-
-    return totalFramesRead;
-}
-
-
-static drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
-{
-    drwav_uint64 totalFramesRead = 0;
-
-    DRWAV_ASSERT(pWav != NULL);
-    DRWAV_ASSERT(framesToRead > 0);
-    DRWAV_ASSERT(pBufferOut != NULL);
-
-    /* TODO: Lots of room for optimization here. */
-
-    while (framesToRead > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) {
-        /* If there are no cached samples we need to load a new block. */
-        if (pWav->ima.cachedFrameCount == 0 && pWav->ima.bytesRemainingInBlock == 0) {
-            if (pWav->channels == 1) {
-                /* Mono. */
-                drwav_uint8 header[4];
-                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
-                    return totalFramesRead;
-                }
-                pWav->ima.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
-
-                pWav->ima.predictor[0] = drwav__bytes_to_s16(header + 0);
-                pWav->ima.stepIndex[0] = header[2];
-                pWav->ima.cachedFrames[drwav_countof(pWav->ima.cachedFrames) - 1] = pWav->ima.predictor[0];
-                pWav->ima.cachedFrameCount = 1;
-            } else {
-                /* Stereo. */
-                drwav_uint8 header[8];
-                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
-                    return totalFramesRead;
-                }
-                pWav->ima.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
-
-                pWav->ima.predictor[0] = drwav__bytes_to_s16(header + 0);
-                pWav->ima.stepIndex[0] = header[2];
-                pWav->ima.predictor[1] = drwav__bytes_to_s16(header + 4);
-                pWav->ima.stepIndex[1] = header[6];
-
-                pWav->ima.cachedFrames[drwav_countof(pWav->ima.cachedFrames) - 2] = pWav->ima.predictor[0];
-                pWav->ima.cachedFrames[drwav_countof(pWav->ima.cachedFrames) - 1] = pWav->ima.predictor[1];
-                pWav->ima.cachedFrameCount = 1;
-            }
-        }
-
-        /* Output anything that's cached. */
-        while (framesToRead > 0 && pWav->ima.cachedFrameCount > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) {
-            drwav_uint32 iSample;
-            for (iSample = 0; iSample < pWav->channels; iSample += 1) {
-                pBufferOut[iSample] = (drwav_int16)pWav->ima.cachedFrames[(drwav_countof(pWav->ima.cachedFrames) - (pWav->ima.cachedFrameCount*pWav->channels)) + iSample];
-            }
-
-            pBufferOut      += pWav->channels;
-            framesToRead    -= 1;
-            totalFramesRead += 1;
-            pWav->compressed.iCurrentPCMFrame += 1;
-            pWav->ima.cachedFrameCount -= 1;
-        }
-
-        if (framesToRead == 0) {
-            return totalFramesRead;
-        }
-
-        /*
-        If there's nothing left in the cache, just go ahead and load more. If there's nothing left to load in the current block we just continue to the next
-        loop iteration which will trigger the loading of a new block.
-        */
-        if (pWav->ima.cachedFrameCount == 0) {
-            if (pWav->ima.bytesRemainingInBlock == 0) {
-                continue;
-            } else {
-                static drwav_int32 indexTable[16] = {
-                    -1, -1, -1, -1, 2, 4, 6, 8,
-                    -1, -1, -1, -1, 2, 4, 6, 8
-                };
-
-                static drwav_int32 stepTable[89] = {
-                    7,     8,     9,     10,    11,    12,    13,    14,    16,    17, 
-                    19,    21,    23,    25,    28,    31,    34,    37,    41,    45, 
-                    50,    55,    60,    66,    73,    80,    88,    97,    107,   118, 
-                    130,   143,   157,   173,   190,   209,   230,   253,   279,   307,
-                    337,   371,   408,   449,   494,   544,   598,   658,   724,   796,
-                    876,   963,   1060,  1166,  1282,  1411,  1552,  1707,  1878,  2066, 
-                    2272,  2499,  2749,  3024,  3327,  3660,  4026,  4428,  4871,  5358,
-                    5894,  6484,  7132,  7845,  8630,  9493,  10442, 11487, 12635, 13899, 
-                    15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767 
-                };
-
-                drwav_uint32 iChannel;
-
-                /*
-                From what I can tell with stereo streams, it looks like every 4 bytes (8 samples) is for one channel. So it goes 4 bytes for the
-                left channel, 4 bytes for the right channel.
-                */
-                pWav->ima.cachedFrameCount = 8;
-                for (iChannel = 0; iChannel < pWav->channels; ++iChannel) {
-                    drwav_uint32 iByte;
-                    drwav_uint8 nibbles[4];
-                    if (pWav->onRead(pWav->pUserData, &nibbles, 4) != 4) {
-                        pWav->ima.cachedFrameCount = 0;
-                        return totalFramesRead;
-                    }
-                    pWav->ima.bytesRemainingInBlock -= 4;
-
-                    for (iByte = 0; iByte < 4; ++iByte) {
-                        drwav_uint8 nibble0 = ((nibbles[iByte] & 0x0F) >> 0);
-                        drwav_uint8 nibble1 = ((nibbles[iByte] & 0xF0) >> 4);
-
-                        drwav_int32 step      = stepTable[pWav->ima.stepIndex[iChannel]];
-                        drwav_int32 predictor = pWav->ima.predictor[iChannel];
-
-                        drwav_int32      diff  = step >> 3;
-                        if (nibble0 & 1) diff += step >> 2;
-                        if (nibble0 & 2) diff += step >> 1;
-                        if (nibble0 & 4) diff += step;
-                        if (nibble0 & 8) diff  = -diff;
-
-                        predictor = drwav_clamp(predictor + diff, -32768, 32767);
-                        pWav->ima.predictor[iChannel] = predictor;
-                        pWav->ima.stepIndex[iChannel] = drwav_clamp(pWav->ima.stepIndex[iChannel] + indexTable[nibble0], 0, (drwav_int32)drwav_countof(stepTable)-1);
-                        pWav->ima.cachedFrames[(drwav_countof(pWav->ima.cachedFrames) - (pWav->ima.cachedFrameCount*pWav->channels)) + (iByte*2+0)*pWav->channels + iChannel] = predictor;
-
-
-                        step      = stepTable[pWav->ima.stepIndex[iChannel]];
-                        predictor = pWav->ima.predictor[iChannel];
-
-                                         diff  = step >> 3;
-                        if (nibble1 & 1) diff += step >> 2;
-                        if (nibble1 & 2) diff += step >> 1;
-                        if (nibble1 & 4) diff += step;
-                        if (nibble1 & 8) diff  = -diff;
-
-                        predictor = drwav_clamp(predictor + diff, -32768, 32767);
-                        pWav->ima.predictor[iChannel] = predictor;
-                        pWav->ima.stepIndex[iChannel] = drwav_clamp(pWav->ima.stepIndex[iChannel] + indexTable[nibble1], 0, (drwav_int32)drwav_countof(stepTable)-1);
-                        pWav->ima.cachedFrames[(drwav_countof(pWav->ima.cachedFrames) - (pWav->ima.cachedFrameCount*pWav->channels)) + (iByte*2+1)*pWav->channels + iChannel] = predictor;
-                    }
-                }
-            }
-        }
-    }
-
-    return totalFramesRead;
-}
-
-
-#ifndef DR_WAV_NO_CONVERSION_API
-static unsigned short g_drwavAlawTable[256] = {
-    0xEA80, 0xEB80, 0xE880, 0xE980, 0xEE80, 0xEF80, 0xEC80, 0xED80, 0xE280, 0xE380, 0xE080, 0xE180, 0xE680, 0xE780, 0xE480, 0xE580, 
-    0xF540, 0xF5C0, 0xF440, 0xF4C0, 0xF740, 0xF7C0, 0xF640, 0xF6C0, 0xF140, 0xF1C0, 0xF040, 0xF0C0, 0xF340, 0xF3C0, 0xF240, 0xF2C0, 
-    0xAA00, 0xAE00, 0xA200, 0xA600, 0xBA00, 0xBE00, 0xB200, 0xB600, 0x8A00, 0x8E00, 0x8200, 0x8600, 0x9A00, 0x9E00, 0x9200, 0x9600, 
-    0xD500, 0xD700, 0xD100, 0xD300, 0xDD00, 0xDF00, 0xD900, 0xDB00, 0xC500, 0xC700, 0xC100, 0xC300, 0xCD00, 0xCF00, 0xC900, 0xCB00, 
-    0xFEA8, 0xFEB8, 0xFE88, 0xFE98, 0xFEE8, 0xFEF8, 0xFEC8, 0xFED8, 0xFE28, 0xFE38, 0xFE08, 0xFE18, 0xFE68, 0xFE78, 0xFE48, 0xFE58, 
-    0xFFA8, 0xFFB8, 0xFF88, 0xFF98, 0xFFE8, 0xFFF8, 0xFFC8, 0xFFD8, 0xFF28, 0xFF38, 0xFF08, 0xFF18, 0xFF68, 0xFF78, 0xFF48, 0xFF58, 
-    0xFAA0, 0xFAE0, 0xFA20, 0xFA60, 0xFBA0, 0xFBE0, 0xFB20, 0xFB60, 0xF8A0, 0xF8E0, 0xF820, 0xF860, 0xF9A0, 0xF9E0, 0xF920, 0xF960, 
-    0xFD50, 0xFD70, 0xFD10, 0xFD30, 0xFDD0, 0xFDF0, 0xFD90, 0xFDB0, 0xFC50, 0xFC70, 0xFC10, 0xFC30, 0xFCD0, 0xFCF0, 0xFC90, 0xFCB0, 
-    0x1580, 0x1480, 0x1780, 0x1680, 0x1180, 0x1080, 0x1380, 0x1280, 0x1D80, 0x1C80, 0x1F80, 0x1E80, 0x1980, 0x1880, 0x1B80, 0x1A80, 
-    0x0AC0, 0x0A40, 0x0BC0, 0x0B40, 0x08C0, 0x0840, 0x09C0, 0x0940, 0x0EC0, 0x0E40, 0x0FC0, 0x0F40, 0x0CC0, 0x0C40, 0x0DC0, 0x0D40, 
-    0x5600, 0x5200, 0x5E00, 0x5A00, 0x4600, 0x4200, 0x4E00, 0x4A00, 0x7600, 0x7200, 0x7E00, 0x7A00, 0x6600, 0x6200, 0x6E00, 0x6A00, 
-    0x2B00, 0x2900, 0x2F00, 0x2D00, 0x2300, 0x2100, 0x2700, 0x2500, 0x3B00, 0x3900, 0x3F00, 0x3D00, 0x3300, 0x3100, 0x3700, 0x3500, 
-    0x0158, 0x0148, 0x0178, 0x0168, 0x0118, 0x0108, 0x0138, 0x0128, 0x01D8, 0x01C8, 0x01F8, 0x01E8, 0x0198, 0x0188, 0x01B8, 0x01A8, 
-    0x0058, 0x0048, 0x0078, 0x0068, 0x0018, 0x0008, 0x0038, 0x0028, 0x00D8, 0x00C8, 0x00F8, 0x00E8, 0x0098, 0x0088, 0x00B8, 0x00A8, 
-    0x0560, 0x0520, 0x05E0, 0x05A0, 0x0460, 0x0420, 0x04E0, 0x04A0, 0x0760, 0x0720, 0x07E0, 0x07A0, 0x0660, 0x0620, 0x06E0, 0x06A0, 
-    0x02B0, 0x0290, 0x02F0, 0x02D0, 0x0230, 0x0210, 0x0270, 0x0250, 0x03B0, 0x0390, 0x03F0, 0x03D0, 0x0330, 0x0310, 0x0370, 0x0350
-};
-
-static unsigned short g_drwavMulawTable[256] = {
-    0x8284, 0x8684, 0x8A84, 0x8E84, 0x9284, 0x9684, 0x9A84, 0x9E84, 0xA284, 0xA684, 0xAA84, 0xAE84, 0xB284, 0xB684, 0xBA84, 0xBE84, 
-    0xC184, 0xC384, 0xC584, 0xC784, 0xC984, 0xCB84, 0xCD84, 0xCF84, 0xD184, 0xD384, 0xD584, 0xD784, 0xD984, 0xDB84, 0xDD84, 0xDF84, 
-    0xE104, 0xE204, 0xE304, 0xE404, 0xE504, 0xE604, 0xE704, 0xE804, 0xE904, 0xEA04, 0xEB04, 0xEC04, 0xED04, 0xEE04, 0xEF04, 0xF004, 
-    0xF0C4, 0xF144, 0xF1C4, 0xF244, 0xF2C4, 0xF344, 0xF3C4, 0xF444, 0xF4C4, 0xF544, 0xF5C4, 0xF644, 0xF6C4, 0xF744, 0xF7C4, 0xF844, 
-    0xF8A4, 0xF8E4, 0xF924, 0xF964, 0xF9A4, 0xF9E4, 0xFA24, 0xFA64, 0xFAA4, 0xFAE4, 0xFB24, 0xFB64, 0xFBA4, 0xFBE4, 0xFC24, 0xFC64, 
-    0xFC94, 0xFCB4, 0xFCD4, 0xFCF4, 0xFD14, 0xFD34, 0xFD54, 0xFD74, 0xFD94, 0xFDB4, 0xFDD4, 0xFDF4, 0xFE14, 0xFE34, 0xFE54, 0xFE74, 
-    0xFE8C, 0xFE9C, 0xFEAC, 0xFEBC, 0xFECC, 0xFEDC, 0xFEEC, 0xFEFC, 0xFF0C, 0xFF1C, 0xFF2C, 0xFF3C, 0xFF4C, 0xFF5C, 0xFF6C, 0xFF7C, 
-    0xFF88, 0xFF90, 0xFF98, 0xFFA0, 0xFFA8, 0xFFB0, 0xFFB8, 0xFFC0, 0xFFC8, 0xFFD0, 0xFFD8, 0xFFE0, 0xFFE8, 0xFFF0, 0xFFF8, 0x0000, 
-    0x7D7C, 0x797C, 0x757C, 0x717C, 0x6D7C, 0x697C, 0x657C, 0x617C, 0x5D7C, 0x597C, 0x557C, 0x517C, 0x4D7C, 0x497C, 0x457C, 0x417C, 
-    0x3E7C, 0x3C7C, 0x3A7C, 0x387C, 0x367C, 0x347C, 0x327C, 0x307C, 0x2E7C, 0x2C7C, 0x2A7C, 0x287C, 0x267C, 0x247C, 0x227C, 0x207C, 
-    0x1EFC, 0x1DFC, 0x1CFC, 0x1BFC, 0x1AFC, 0x19FC, 0x18FC, 0x17FC, 0x16FC, 0x15FC, 0x14FC, 0x13FC, 0x12FC, 0x11FC, 0x10FC, 0x0FFC, 
-    0x0F3C, 0x0EBC, 0x0E3C, 0x0DBC, 0x0D3C, 0x0CBC, 0x0C3C, 0x0BBC, 0x0B3C, 0x0ABC, 0x0A3C, 0x09BC, 0x093C, 0x08BC, 0x083C, 0x07BC, 
-    0x075C, 0x071C, 0x06DC, 0x069C, 0x065C, 0x061C, 0x05DC, 0x059C, 0x055C, 0x051C, 0x04DC, 0x049C, 0x045C, 0x041C, 0x03DC, 0x039C, 
-    0x036C, 0x034C, 0x032C, 0x030C, 0x02EC, 0x02CC, 0x02AC, 0x028C, 0x026C, 0x024C, 0x022C, 0x020C, 0x01EC, 0x01CC, 0x01AC, 0x018C, 
-    0x0174, 0x0164, 0x0154, 0x0144, 0x0134, 0x0124, 0x0114, 0x0104, 0x00F4, 0x00E4, 0x00D4, 0x00C4, 0x00B4, 0x00A4, 0x0094, 0x0084, 
-    0x0078, 0x0070, 0x0068, 0x0060, 0x0058, 0x0050, 0x0048, 0x0040, 0x0038, 0x0030, 0x0028, 0x0020, 0x0018, 0x0010, 0x0008, 0x0000
-};
-
-static DRWAV_INLINE drwav_int16 drwav__alaw_to_s16(drwav_uint8 sampleIn)
-{
-    return (short)g_drwavAlawTable[sampleIn];
-}
-
-static DRWAV_INLINE drwav_int16 drwav__mulaw_to_s16(drwav_uint8 sampleIn)
-{
-    return (short)g_drwavMulawTable[sampleIn];
-}
-
-
-
-static void drwav__pcm_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
-{
-    unsigned int i;
-
-    /* Special case for 8-bit sample data because it's treated as unsigned. */
-    if (bytesPerSample == 1) {
-        drwav_u8_to_s16(pOut, pIn, totalSampleCount);
-        return;
-    }
-
-
-    /* Slightly more optimal implementation for common formats. */
-    if (bytesPerSample == 2) {
-        for (i = 0; i < totalSampleCount; ++i) {
-           *pOut++ = ((const drwav_int16*)pIn)[i];
-        }
-        return;
-    }
-    if (bytesPerSample == 3) {
-        drwav_s24_to_s16(pOut, pIn, totalSampleCount);
-        return;
-    }
-    if (bytesPerSample == 4) {
-        drwav_s32_to_s16(pOut, (const drwav_int32*)pIn, totalSampleCount);
-        return;
-    }
-
-
-    /* Anything more than 64 bits per sample is not supported. */
-    if (bytesPerSample > 8) {
-        DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut));
-        return;
-    }
-
-
-    /* Generic, slow converter. */
-    for (i = 0; i < totalSampleCount; ++i) {
-        drwav_uint64 sample = 0;
-        unsigned int shift  = (8 - bytesPerSample) * 8;
-
-        unsigned int j;
-        for (j = 0; j < bytesPerSample; j += 1) {
-            DRWAV_ASSERT(j < 8);
-            sample |= (drwav_uint64)(pIn[j]) << shift;
-            shift  += 8;
-        }
-
-        pIn += j;
-        *pOut++ = (drwav_int16)((drwav_int64)sample >> 48);
-    }
-}
-
-static void drwav__ieee_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
-{
-    if (bytesPerSample == 4) {
-        drwav_f32_to_s16(pOut, (const double*)pIn, totalSampleCount);
-        return;
-    } else if (bytesPerSample == 8) {
-        drwav_f64_to_s16(pOut, (const double*)pIn, totalSampleCount);
-        return;
-    } else {
-        /* Only supporting 32- and 64-bit double. Output silence in all other cases. Contributions welcome for 16-bit double. */
-        DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut));
-        return;
-    }
-}
-
-static drwav_uint64 drwav_read_pcm_frames_s16__pcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
-{
-    drwav_uint32 bytesPerFrame;
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-
-    /* Fast path. */
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bitsPerSample == 16) {
-        return drwav_read_pcm_frames(pWav, framesToRead, pBufferOut);
-    }
-    
-    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-    
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav__pcm_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_s16__ieee(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
-{
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-    
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav__ieee_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_s16__alaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
-{
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-    
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav_alaw_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_s16__mulaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
-{
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav_mulaw_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
-{
-    if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) {
-        return 0;
-    }
-
-    /* Don't try to read more samples than can potentially fit in the output buffer. */
-    if (framesToRead * pWav->channels * sizeof(drwav_int16) > DRWAV_SIZE_MAX) {
-        framesToRead = DRWAV_SIZE_MAX / sizeof(drwav_int16) / pWav->channels;
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) {
-        return drwav_read_pcm_frames_s16__pcm(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) {
-        return drwav_read_pcm_frames_s16__ieee(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW) {
-        return drwav_read_pcm_frames_s16__alaw(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) {
-        return drwav_read_pcm_frames_s16__mulaw(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
-        return drwav_read_pcm_frames_s16__msadpcm(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
-        return drwav_read_pcm_frames_s16__ima(pWav, framesToRead, pBufferOut);
-    }
-
-    return 0;
-}
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16le(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
-{
-    drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, framesToRead, pBufferOut);
-    if (!drwav__is_little_endian()) {
-        drwav__bswap_samples_s16(pBufferOut, framesRead*pWav->channels);
-    }
-
-    return framesRead;
-}
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16be(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
-{
-    drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, framesToRead, pBufferOut);
-    if (drwav__is_little_endian()) {
-        drwav__bswap_samples_s16(pBufferOut, framesRead*pWav->channels);
-    }
-
-    return framesRead;
-}
-
-
-DRWAV_API void drwav_u8_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    int r;
-    size_t i;
-    for (i = 0; i < sampleCount; ++i) {
-        int x = pIn[i];
-        r = x << 8;
-        r = r - 32768;
-        pOut[i] = (short)r;
-    }
-}
-
-DRWAV_API void drwav_s24_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    int r;
-    size_t i;
-    for (i = 0; i < sampleCount; ++i) {
-        int x = ((int)(((unsigned int)(((const drwav_uint8*)pIn)[i*3+0]) << 8) | ((unsigned int)(((const drwav_uint8*)pIn)[i*3+1]) << 16) | ((unsigned int)(((const drwav_uint8*)pIn)[i*3+2])) << 24)) >> 8;
-        r = x >> 8;
-        pOut[i] = (short)r;
-    }
-}
-
-DRWAV_API void drwav_s32_to_s16(drwav_int16* pOut, const drwav_int32* pIn, size_t sampleCount)
-{
-    int r;
-    size_t i;
-    for (i = 0; i < sampleCount; ++i) {
-        int x = pIn[i];
-        r = x >> 16;
-        pOut[i] = (short)r;
-    }
-}
-
-DRWAV_API void drwav_f32_to_s16(drwav_int16* pOut, const double* pIn, size_t sampleCount)
-{
-    int r;
-    size_t i;
-    for (i = 0; i < sampleCount; ++i) {
-        double x = pIn[i];
-        double c;
-        c = ((x < -1) ? -1 : ((x > 1) ? 1 : x));
-        c = c + 1;
-        r = (int)(c * 32767.5f);
-        r = r - 32768;
-        pOut[i] = (short)r;
-    }
-}
-
-DRWAV_API void drwav_f64_to_s16(drwav_int16* pOut, const double* pIn, size_t sampleCount)
-{
-    int r;
-    size_t i;
-    for (i = 0; i < sampleCount; ++i) {
-        double x = pIn[i];
-        double c;
-        c = ((x < -1) ? -1 : ((x > 1) ? 1 : x));
-        c = c + 1;
-        r = (int)(c * 32767.5);
-        r = r - 32768;
-        pOut[i] = (short)r;
-    }
-}
-
-DRWAV_API void drwav_alaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    size_t i;
-    for (i = 0; i < sampleCount; ++i) {
-        pOut[i] = drwav__alaw_to_s16(pIn[i]);
-    }
-}
-
-DRWAV_API void drwav_mulaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    size_t i;
-    for (i = 0; i < sampleCount; ++i) {
-        pOut[i] = drwav__mulaw_to_s16(pIn[i]);
-    }
-}
-
-
-
-static void drwav__pcm_to_f32(double* pOut, const drwav_uint8* pIn, size_t sampleCount, unsigned int bytesPerSample)
-{
-    unsigned int i;
-
-    /* Special case for 8-bit sample data because it's treated as unsigned. */
-    if (bytesPerSample == 1) {
-        drwav_u8_to_f32(pOut, pIn, sampleCount);
-        return;
-    }
-
-    /* Slightly more optimal implementation for common formats. */
-    if (bytesPerSample == 2) {
-        drwav_s16_to_f32(pOut, (const drwav_int16*)pIn, sampleCount);
-        return;
-    }
-    if (bytesPerSample == 3) {
-        drwav_s24_to_f32(pOut, pIn, sampleCount);
-        return;
-    }
-    if (bytesPerSample == 4) {
-        drwav_s32_to_f32(pOut, (const drwav_int32*)pIn, sampleCount);
-        return;
-    }
-
-
-    /* Anything more than 64 bits per sample is not supported. */
-    if (bytesPerSample > 8) {
-        DRWAV_ZERO_MEMORY(pOut, sampleCount * sizeof(*pOut));
-        return;
-    }
-
-
-    /* Generic, slow converter. */
-    for (i = 0; i < sampleCount; ++i) {
-        drwav_uint64 sample = 0;
-        unsigned int shift  = (8 - bytesPerSample) * 8;
-
-        unsigned int j;
-        for (j = 0; j < bytesPerSample; j += 1) {
-            DRWAV_ASSERT(j < 8);
-            sample |= (drwav_uint64)(pIn[j]) << shift;
-            shift  += 8;
-        }
-
-        pIn += j;
-        *pOut++ = (double)((drwav_int64)sample / 9223372036854775807.0);
-    }
-}
-
-static void drwav__ieee_to_f32(double* pOut, const drwav_uint8* pIn, size_t sampleCount, unsigned int bytesPerSample)
-{
-    if (bytesPerSample == 4) {
-        unsigned int i;
-        for (i = 0; i < sampleCount; ++i) {
-            *pOut++ = ((const double*)pIn)[i];
-        }
-        return;
-    } else if (bytesPerSample == 8) {
-        drwav_f64_to_f32(pOut, (const double*)pIn, sampleCount);
-        return;
-    } else {
-        /* Only supporting 32- and 64-bit double. Output silence in all other cases. Contributions welcome for 16-bit double. */
-        DRWAV_ZERO_MEMORY(pOut, sampleCount * sizeof(*pOut));
-        return;
-    }
-}
-
-
-static drwav_uint64 drwav_read_pcm_frames_f32__pcm(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut)
-{
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav__pcm_to_f32(pBufferOut, sampleData, (size_t)framesRead*pWav->channels, bytesPerFrame/pWav->channels);
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_f32__msadpcm(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut)
-{
-    /*
-    We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't
-    want to duplicate that code.
-    */
-    drwav_uint64 totalFramesRead = 0;
-    drwav_int16 samples16[2048];
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav_s16_to_f32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels));   /* <-- Safe cast because we're clamping to 2048. */
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_f32__ima(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut)
-{
-    /*
-    We're just going to borrow the implementation from the drwav_read_s16() since IMA-ADPCM is a little bit more complicated than other formats and I don't
-    want to duplicate that code.
-    */
-    drwav_uint64 totalFramesRead = 0;
-    drwav_int16 samples16[2048];
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav_s16_to_f32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels));   /* <-- Safe cast because we're clamping to 2048. */
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_f32__ieee(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut)
-{
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-    drwav_uint32 bytesPerFrame;
-
-    /* Fast path. */
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT && pWav->bitsPerSample == 32) {
-        return drwav_read_pcm_frames(pWav, framesToRead, pBufferOut);
-    }
-    
-    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav__ieee_to_f32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_f32__alaw(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut)
-{
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav_alaw_to_f32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_f32__mulaw(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut)
-{
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav_mulaw_to_f32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut)
-{
-    if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) {
-        return 0;
-    }
-
-    /* Don't try to read more samples than can potentially fit in the output buffer. */
-    if (framesToRead * pWav->channels * sizeof(double) > DRWAV_SIZE_MAX) {
-        framesToRead = DRWAV_SIZE_MAX / sizeof(double) / pWav->channels;
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) {
-        return drwav_read_pcm_frames_f32__pcm(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
-        return drwav_read_pcm_frames_f32__msadpcm(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) {
-        return drwav_read_pcm_frames_f32__ieee(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW) {
-        return drwav_read_pcm_frames_f32__alaw(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) {
-        return drwav_read_pcm_frames_f32__mulaw(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
-        return drwav_read_pcm_frames_f32__ima(pWav, framesToRead, pBufferOut);
-    }
-
-    return 0;
-}
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32le(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut)
-{
-    drwav_uint64 framesRead = drwav_read_pcm_frames_f32(pWav, framesToRead, pBufferOut);
-    if (!drwav__is_little_endian()) {
-        drwav__bswap_samples_f32(pBufferOut, framesRead*pWav->channels);
-    }
-
-    return framesRead;
-}
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32be(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut)
-{
-    drwav_uint64 framesRead = drwav_read_pcm_frames_f32(pWav, framesToRead, pBufferOut);
-    if (drwav__is_little_endian()) {
-        drwav__bswap_samples_f32(pBufferOut, framesRead*pWav->channels);
-    }
-
-    return framesRead;
-}
-
-
-DRWAV_API void drwav_u8_to_f32(double* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-#ifdef DR_WAV_LIBSNDFILE_COMPAT
-    /*
-    It appears libsndfile uses slightly different logic for the u8 -> f32 conversion to dr_wav, which in my opinion is incorrect. It appears
-    libsndfile performs the conversion something like "f32 = (u8 / 256) * 2 - 1", however I think it should be "f32 = (u8 / 255) * 2 - 1" (note
-    the divisor of 256 vs 255). I use libsndfile as a benchmark for testing, so I'm therefore leaving this block here just for my automated
-    correctness testing. This is disabled by default.
-    */
-    for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = (pIn[i] / 256.0f) * 2 - 1;
-    }
-#else
-    for (i = 0; i < sampleCount; ++i) {
-        double x = pIn[i];
-        x = x * 0.00784313725490196078f;    /* 0..255 to 0..2 */
-        x = x - 1;                          /* 0..2 to -1..1 */
-
-        *pOut++ = x;
-    }
-#endif
-}
-
-DRWAV_API void drwav_s16_to_f32(double* pOut, const drwav_int16* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = pIn[i] * 0.000030517578125f;
-    }
-}
-
-DRWAV_API void drwav_s24_to_f32(double* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        double x = (double)(((drwav_int32)(((drwav_uint32)(pIn[i*3+0]) << 8) | ((drwav_uint32)(pIn[i*3+1]) << 16) | ((drwav_uint32)(pIn[i*3+2])) << 24)) >> 8);
-        *pOut++ = (double)(x * 0.00000011920928955078125);
-    }
-}
-
-DRWAV_API void drwav_s32_to_f32(double* pOut, const drwav_int32* pIn, size_t sampleCount)
-{
-    size_t i;
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = (double)(pIn[i] / 2147483648.0);
-    }
-}
-
-DRWAV_API void drwav_f64_to_f32(double* pOut, const double* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = (double)pIn[i];
-    }
-}
-
-DRWAV_API void drwav_alaw_to_f32(double* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = drwav__alaw_to_s16(pIn[i]) / 32768.0f;
-    }
-}
-
-DRWAV_API void drwav_mulaw_to_f32(double* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = drwav__mulaw_to_s16(pIn[i]) / 32768.0f;
-    }
-}
-
-
-
-static void drwav__pcm_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
-{
-    unsigned int i;
-
-    /* Special case for 8-bit sample data because it's treated as unsigned. */
-    if (bytesPerSample == 1) {
-        drwav_u8_to_s32(pOut, pIn, totalSampleCount);
-        return;
-    }
-
-    /* Slightly more optimal implementation for common formats. */
-    if (bytesPerSample == 2) {
-        drwav_s16_to_s32(pOut, (const drwav_int16*)pIn, totalSampleCount);
-        return;
-    }
-    if (bytesPerSample == 3) {
-        drwav_s24_to_s32(pOut, pIn, totalSampleCount);
-        return;
-    }
-    if (bytesPerSample == 4) {
-        for (i = 0; i < totalSampleCount; ++i) {
-           *pOut++ = ((const drwav_int32*)pIn)[i];
-        }
-        return;
-    }
-
-
-    /* Anything more than 64 bits per sample is not supported. */
-    if (bytesPerSample > 8) {
-        DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut));
-        return;
-    }
-
-
-    /* Generic, slow converter. */
-    for (i = 0; i < totalSampleCount; ++i) {
-        drwav_uint64 sample = 0;
-        unsigned int shift  = (8 - bytesPerSample) * 8;
-
-        unsigned int j;
-        for (j = 0; j < bytesPerSample; j += 1) {
-            DRWAV_ASSERT(j < 8);
-            sample |= (drwav_uint64)(pIn[j]) << shift;
-            shift  += 8;
-        }
-
-        pIn += j;
-        *pOut++ = (drwav_int32)((drwav_int64)sample >> 32);
-    }
-}
-
-static void drwav__ieee_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
-{
-    if (bytesPerSample == 4) {
-        drwav_f32_to_s32(pOut, (const double*)pIn, totalSampleCount);
-        return;
-    } else if (bytesPerSample == 8) {
-        drwav_f64_to_s32(pOut, (const double*)pIn, totalSampleCount);
-        return;
-    } else {
-        /* Only supporting 32- and 64-bit double. Output silence in all other cases. Contributions welcome for 16-bit double. */
-        DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut));
-        return;
-    }
-}
-
-
-static drwav_uint64 drwav_read_pcm_frames_s32__pcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
-{
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-    drwav_uint32 bytesPerFrame;
-
-    /* Fast path. */
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bitsPerSample == 32) {
-        return drwav_read_pcm_frames(pWav, framesToRead, pBufferOut);
-    }
-    
-    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav__pcm_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_s32__msadpcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
-{
-    /*
-    We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't
-    want to duplicate that code.
-    */
-    drwav_uint64 totalFramesRead = 0;
-    drwav_int16 samples16[2048];
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav_s16_to_s32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels));   /* <-- Safe cast because we're clamping to 2048. */
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_s32__ima(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
-{
-    /*
-    We're just going to borrow the implementation from the drwav_read_s16() since IMA-ADPCM is a little bit more complicated than other formats and I don't
-    want to duplicate that code.
-    */
-    drwav_uint64 totalFramesRead = 0;
-    drwav_int16 samples16[2048];
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav_s16_to_s32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels));   /* <-- Safe cast because we're clamping to 2048. */
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_s32__ieee(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
-{
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav__ieee_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_s32__alaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
-{
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav_alaw_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_s32__mulaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
-{
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav_mulaw_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
-{
-    if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) {
-        return 0;
-    }
-
-    /* Don't try to read more samples than can potentially fit in the output buffer. */
-    if (framesToRead * pWav->channels * sizeof(drwav_int32) > DRWAV_SIZE_MAX) {
-        framesToRead = DRWAV_SIZE_MAX / sizeof(drwav_int32) / pWav->channels;
-    }
-
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) {
-        return drwav_read_pcm_frames_s32__pcm(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
-        return drwav_read_pcm_frames_s32__msadpcm(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) {
-        return drwav_read_pcm_frames_s32__ieee(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW) {
-        return drwav_read_pcm_frames_s32__alaw(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) {
-        return drwav_read_pcm_frames_s32__mulaw(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
-        return drwav_read_pcm_frames_s32__ima(pWav, framesToRead, pBufferOut);
-    }
-
-    return 0;
-}
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32le(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
-{
-    drwav_uint64 framesRead = drwav_read_pcm_frames_s32(pWav, framesToRead, pBufferOut);
-    if (!drwav__is_little_endian()) {
-        drwav__bswap_samples_s32(pBufferOut, framesRead*pWav->channels);
-    }
-
-    return framesRead;
-}
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32be(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
-{
-    drwav_uint64 framesRead = drwav_read_pcm_frames_s32(pWav, framesToRead, pBufferOut);
-    if (drwav__is_little_endian()) {
-        drwav__bswap_samples_s32(pBufferOut, framesRead*pWav->channels);
-    }
-
-    return framesRead;
-}
-
-
-DRWAV_API void drwav_u8_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = ((int)pIn[i] - 128) << 24;
-    }
-}
-
-DRWAV_API void drwav_s16_to_s32(drwav_int32* pOut, const drwav_int16* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = pIn[i] << 16;
-    }
-}
-
-DRWAV_API void drwav_s24_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        unsigned int s0 = pIn[i*3 + 0];
-        unsigned int s1 = pIn[i*3 + 1];
-        unsigned int s2 = pIn[i*3 + 2];
-
-        drwav_int32 sample32 = (drwav_int32)((s0 << 8) | (s1 << 16) | (s2 << 24));
-        *pOut++ = sample32;
-    }
-}
-
-DRWAV_API void drwav_f32_to_s32(drwav_int32* pOut, const double* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = (drwav_int32)(2147483648.0 * pIn[i]);
-    }
-}
-
-DRWAV_API void drwav_f64_to_s32(drwav_int32* pOut, const double* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = (drwav_int32)(2147483648.0 * pIn[i]);
-    }
-}
-
-DRWAV_API void drwav_alaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = ((drwav_int32)drwav__alaw_to_s16(pIn[i])) << 16;
-    }
-}
-
-DRWAV_API void drwav_mulaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i= 0; i < sampleCount; ++i) {
-        *pOut++ = ((drwav_int32)drwav__mulaw_to_s16(pIn[i])) << 16;
-    }
-}
-
-
-
-static drwav_int16* drwav__read_pcm_frames_and_close_s16(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount)
-{
-    drwav_uint64 sampleDataSize;
-    drwav_int16* pSampleData;
-    drwav_uint64 framesRead;
-
-    DRWAV_ASSERT(pWav != NULL);
-
-    sampleDataSize = pWav->totalPCMFrameCount * pWav->channels * sizeof(drwav_int16);
-    if (sampleDataSize > DRWAV_SIZE_MAX) {
-        drwav_uninit(pWav);
-        return NULL;    /* File's too big. */
-    }
-
-    pSampleData = (drwav_int16*)drwav__malloc_from_callbacks((size_t)sampleDataSize, &pWav->allocationCallbacks); /* <-- Safe cast due to the check above. */
-    if (pSampleData == NULL) {
-        drwav_uninit(pWav);
-        return NULL;    /* Failed to allocate memory. */
-    }
-
-    framesRead = drwav_read_pcm_frames_s16(pWav, (size_t)pWav->totalPCMFrameCount, pSampleData);
-    if (framesRead != pWav->totalPCMFrameCount) {
-        drwav__free_from_callbacks(pSampleData, &pWav->allocationCallbacks);
-        drwav_uninit(pWav);
-        return NULL;    /* There was an error reading the samples. */
-    }
-
-    drwav_uninit(pWav);
-
-    if (sampleRate) {
-        *sampleRate = pWav->sampleRate;
-    }
-    if (channels) {
-        *channels = pWav->channels;
-    }
-    if (totalFrameCount) {
-        *totalFrameCount = pWav->totalPCMFrameCount;
-    }
-
-    return pSampleData;
-}
-
-static double* drwav__read_pcm_frames_and_close_f32(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount)
-{
-    drwav_uint64 sampleDataSize;
-    double* pSampleData;
-    drwav_uint64 framesRead;
-
-    DRWAV_ASSERT(pWav != NULL);
-
-    sampleDataSize = pWav->totalPCMFrameCount * pWav->channels * sizeof(double);
-    if (sampleDataSize > DRWAV_SIZE_MAX) {
-        drwav_uninit(pWav);
-        return NULL;    /* File's too big. */
-    }
-
-    pSampleData = (double*)drwav__malloc_from_callbacks((size_t)sampleDataSize, &pWav->allocationCallbacks); /* <-- Safe cast due to the check above. */
-    if (pSampleData == NULL) {
-        drwav_uninit(pWav);
-        return NULL;    /* Failed to allocate memory. */
-    }
-
-    framesRead = drwav_read_pcm_frames_f32(pWav, (size_t)pWav->totalPCMFrameCount, pSampleData);
-    if (framesRead != pWav->totalPCMFrameCount) {
-        drwav__free_from_callbacks(pSampleData, &pWav->allocationCallbacks);
-        drwav_uninit(pWav);
-        return NULL;    /* There was an error reading the samples. */
-    }
-
-    drwav_uninit(pWav);
-
-    if (sampleRate) {
-        *sampleRate = pWav->sampleRate;
-    }
-    if (channels) {
-        *channels = pWav->channels;
-    }
-    if (totalFrameCount) {
-        *totalFrameCount = pWav->totalPCMFrameCount;
-    }
-
-    return pSampleData;
-}
-
-static drwav_int32* drwav__read_pcm_frames_and_close_s32(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount)
-{
-    drwav_uint64 sampleDataSize;
-    drwav_int32* pSampleData;
-    drwav_uint64 framesRead;
-
-    DRWAV_ASSERT(pWav != NULL);
-
-    sampleDataSize = pWav->totalPCMFrameCount * pWav->channels * sizeof(drwav_int32);
-    if (sampleDataSize > DRWAV_SIZE_MAX) {
-        drwav_uninit(pWav);
-        return NULL;    /* File's too big. */
-    }
-
-    pSampleData = (drwav_int32*)drwav__malloc_from_callbacks((size_t)sampleDataSize, &pWav->allocationCallbacks); /* <-- Safe cast due to the check above. */
-    if (pSampleData == NULL) {
-        drwav_uninit(pWav);
-        return NULL;    /* Failed to allocate memory. */
-    }
-
-    framesRead = drwav_read_pcm_frames_s32(pWav, (size_t)pWav->totalPCMFrameCount, pSampleData);
-    if (framesRead != pWav->totalPCMFrameCount) {
-        drwav__free_from_callbacks(pSampleData, &pWav->allocationCallbacks);
-        drwav_uninit(pWav);
-        return NULL;    /* There was an error reading the samples. */
-    }
-
-    drwav_uninit(pWav);
-
-    if (sampleRate) {
-        *sampleRate = pWav->sampleRate;
-    }
-    if (channels) {
-        *channels = pWav->channels;
-    }
-    if (totalFrameCount) {
-        *totalFrameCount = pWav->totalPCMFrameCount;
-    }
-
-    return pSampleData;
-}
-
-
-
-DRWAV_API drwav_int16* drwav_open_and_read_pcm_frames_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init(&wav, onRead, onSeek, pUserData, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-
-DRWAV_API double* drwav_open_and_read_pcm_frames_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init(&wav, onRead, onSeek, pUserData, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-
-DRWAV_API drwav_int32* drwav_open_and_read_pcm_frames_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init(&wav, onRead, onSeek, pUserData, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-
-#ifndef DR_WAV_NO_STDIO
-DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init_file(&wav, filename, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-
-DRWAV_API double* drwav_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init_file(&wav, filename, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-
-DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init_file(&wav, filename, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-
-
-DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init_file_w(&wav, filename, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-
-DRWAV_API double* drwav_open_file_and_read_pcm_frames_f32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init_file_w(&wav, filename, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-
-DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init_file_w(&wav, filename, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-#endif
-
-DRWAV_API drwav_int16* drwav_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init_memory(&wav, data, dataSize, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-
-DRWAV_API double* drwav_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init_memory(&wav, data, dataSize, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-
-DRWAV_API drwav_int32* drwav_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init_memory(&wav, data, dataSize, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-#endif  /* DR_WAV_NO_CONVERSION_API */
-
-
-DRWAV_API void drwav_free(void* p, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pAllocationCallbacks != NULL) {
-        drwav__free_from_callbacks(p, pAllocationCallbacks);
-    } else {
-        drwav__free_default(p, NULL);
-    }
-}
-
-DRWAV_API drwav_uint16 drwav_bytes_to_u16(const drwav_uint8* data)
-{
-    return drwav__bytes_to_u16(data);
-}
-
-DRWAV_API drwav_int16 drwav_bytes_to_s16(const drwav_uint8* data)
-{
-    return drwav__bytes_to_s16(data);
-}
-
-DRWAV_API drwav_uint32 drwav_bytes_to_u32(const drwav_uint8* data)
-{
-    return drwav__bytes_to_u32(data);
-}
-
-DRWAV_API drwav_int32 drwav_bytes_to_s32(const drwav_uint8* data)
-{
-    return drwav__bytes_to_s32(data);
-}
-
-DRWAV_API drwav_uint64 drwav_bytes_to_u64(const drwav_uint8* data)
-{
-    return drwav__bytes_to_u64(data);
-}
-
-DRWAV_API drwav_int64 drwav_bytes_to_s64(const drwav_uint8* data)
-{
-    return drwav__bytes_to_s64(data);
-}
-
-
-DRWAV_API drwav_bool32 drwav_guid_equal(const drwav_uint8 a[16], const drwav_uint8 b[16])
-{
-    return drwav__guid_equal(a, b);
-}
-
-DRWAV_API drwav_bool32 drwav_fourcc_equal(const drwav_uint8* a, const char* b)
-{
-    return drwav__fourcc_equal(a, b);
-}
-
-#endif  /* DR_WAV_IMPLEMENTATION */
-
-/*
-RELEASE NOTES - v0.11.0
-=======================
-Version 0.11.0 has breaking API changes.
-
-Improved Client-Defined Memory Allocation
------------------------------------------
-The main change with this release is the addition of a more flexible way of implementing custom memory allocation routines. The
-existing system of DRWAV_MALLOC, DRWAV_REALLOC and DRWAV_FREE are still in place and will be used by default when no custom
-allocation callbacks are specified.
-
-To use the new system, you pass in a pointer to a drwav_allocation_callbacks object to drwav_init() and family, like this:
-
-    void* my_malloc(size_t sz, void* pUserData)
-    {
-        return malloc(sz);
-    }
-    void* my_realloc(void* p, size_t sz, void* pUserData)
-    {
-        return realloc(p, sz);
-    }
-    void my_free(void* p, void* pUserData)
-    {
-        free(p);
-    }
-
-    ...
-
-    drwav_allocation_callbacks allocationCallbacks;
-    allocationCallbacks.pUserData = &myData;
-    allocationCallbacks.onMalloc  = my_malloc;
-    allocationCallbacks.onRealloc = my_realloc;
-    allocationCallbacks.onFree    = my_free;
-    drwav_init_file(&wav, "my_file.wav", &allocationCallbacks);
-
-The advantage of this new system is that it allows you to specify user data which will be passed in to the allocation routines.
-
-Passing in null for the allocation callbacks object will cause dr_wav to use defaults which is the same as DRWAV_MALLOC,
-DRWAV_REALLOC and DRWAV_FREE and the equivalent of how it worked in previous versions.
-
-Every API that opens a drwav object now takes this extra parameter. These include the following:
-
-    drwav_init()
-    drwav_init_ex()
-    drwav_init_file()
-    drwav_init_file_ex()
-    drwav_init_file_w()
-    drwav_init_file_w_ex()
-    drwav_init_memory()
-    drwav_init_memory_ex()
-    drwav_init_write()
-    drwav_init_write_sequential()
-    drwav_init_write_sequential_pcm_frames()
-    drwav_init_file_write()
-    drwav_init_file_write_sequential()
-    drwav_init_file_write_sequential_pcm_frames()
-    drwav_init_file_write_w()
-    drwav_init_file_write_sequential_w()
-    drwav_init_file_write_sequential_pcm_frames_w()
-    drwav_init_memory_write()
-    drwav_init_memory_write_sequential()
-    drwav_init_memory_write_sequential_pcm_frames()
-    drwav_open_and_read_pcm_frames_s16()
-    drwav_open_and_read_pcm_frames_f32()
-    drwav_open_and_read_pcm_frames_s32()
-    drwav_open_file_and_read_pcm_frames_s16()
-    drwav_open_file_and_read_pcm_frames_f32()
-    drwav_open_file_and_read_pcm_frames_s32()
-    drwav_open_file_and_read_pcm_frames_s16_w()
-    drwav_open_file_and_read_pcm_frames_f32_w()
-    drwav_open_file_and_read_pcm_frames_s32_w()
-    drwav_open_memory_and_read_pcm_frames_s16()
-    drwav_open_memory_and_read_pcm_frames_f32()
-    drwav_open_memory_and_read_pcm_frames_s32()
-
-Endian Improvements
--------------------
-Previously, the following APIs returned little-endian audio data. These now return native-endian data. This improves compatibility
-on big-endian architectures.
-
-    drwav_read_pcm_frames()
-    drwav_read_pcm_frames_s16()
-    drwav_read_pcm_frames_s32()
-    drwav_read_pcm_frames_f32()
-    drwav_open_and_read_pcm_frames_s16()
-    drwav_open_and_read_pcm_frames_s32()
-    drwav_open_and_read_pcm_frames_f32()
-    drwav_open_file_and_read_pcm_frames_s16()
-    drwav_open_file_and_read_pcm_frames_s32()
-    drwav_open_file_and_read_pcm_frames_f32()
-    drwav_open_file_and_read_pcm_frames_s16_w()
-    drwav_open_file_and_read_pcm_frames_s32_w()
-    drwav_open_file_and_read_pcm_frames_f32_w()
-    drwav_open_memory_and_read_pcm_frames_s16()
-    drwav_open_memory_and_read_pcm_frames_s32()
-    drwav_open_memory_and_read_pcm_frames_f32()
-
-APIs have been added to give you explicit control over whether or not audio data is read or written in big- or little-endian byte
-order:
-
-    drwav_read_pcm_frames_le()
-    drwav_read_pcm_frames_be()
-    drwav_read_pcm_frames_s16le()
-    drwav_read_pcm_frames_s16be()
-    drwav_read_pcm_frames_f32le()
-    drwav_read_pcm_frames_f32be()
-    drwav_read_pcm_frames_s32le()
-    drwav_read_pcm_frames_s32be()
-    drwav_write_pcm_frames_le()
-    drwav_write_pcm_frames_be()
-
-Removed APIs
-------------
-The following APIs were deprecated in version 0.10.0 and have now been removed:
-
-    drwav_open()
-    drwav_open_ex()
-    drwav_open_write()
-    drwav_open_write_sequential()
-    drwav_open_file()
-    drwav_open_file_ex()
-    drwav_open_file_write()
-    drwav_open_file_write_sequential()
-    drwav_open_memory()
-    drwav_open_memory_ex()
-    drwav_open_memory_write()
-    drwav_open_memory_write_sequential()
-    drwav_close()
-
-
-
-RELEASE NOTES - v0.10.0
-=======================
-Version 0.10.0 has breaking API changes. There are no significant bug fixes in this release, so if you are affected you do
-not need to upgrade.
-
-Removed APIs
-------------
-The following APIs were deprecated in version 0.9.0 and have been completely removed in version 0.10.0:
-
-    drwav_read()
-    drwav_read_s16()
-    drwav_read_f32()
-    drwav_read_s32()
-    drwav_seek_to_sample()
-    drwav_write()
-    drwav_open_and_read_s16()
-    drwav_open_and_read_f32()
-    drwav_open_and_read_s32()
-    drwav_open_file_and_read_s16()
-    drwav_open_file_and_read_f32()
-    drwav_open_file_and_read_s32()
-    drwav_open_memory_and_read_s16()
-    drwav_open_memory_and_read_f32()
-    drwav_open_memory_and_read_s32()
-    drwav::totalSampleCount
-
-See release notes for version 0.9.0 at the bottom of this file for replacement APIs.
-
-Deprecated APIs
----------------
-The following APIs have been deprecated. There is a confusing and completely arbitrary difference between drwav_init*() and
-drwav_open*(), where drwav_init*() initializes a pre-allocated drwav object, whereas drwav_open*() will first allocated a
-drwav object on the heap and then initialize it. drwav_open*() has been deprecated which means you must now use a pre-
-allocated drwav object with drwav_init*(). If you need the previous functionality, you can just do a malloc() followed by
-a called to one of the drwav_init*() APIs.
-
-    drwav_open()
-    drwav_open_ex()
-    drwav_open_write()
-    drwav_open_write_sequential()
-    drwav_open_file()
-    drwav_open_file_ex()
-    drwav_open_file_write()
-    drwav_open_file_write_sequential()
-    drwav_open_memory()
-    drwav_open_memory_ex()
-    drwav_open_memory_write()
-    drwav_open_memory_write_sequential()
-    drwav_close()
-
-These APIs will be removed completely in a future version. The rationale for this change is to remove confusion between the
-two different ways to initialize a drwav object.
-*/
-
-/*
-REVISION HISTORY
-================
-v0.12.5 - 2020-05-27
-  - Minor documentation fix.
-
-v0.12.4 - 2020-05-16
-  - Replace assert() with DRWAV_ASSERT().
-  - Add compile-time and run-time version querying.
-    - DRWAV_VERSION_MINOR
-    - DRWAV_VERSION_MAJOR
-    - DRWAV_VERSION_REVISION
-    - DRWAV_VERSION_STRING
-    - drwav_version()
-    - drwav_version_string()
-
-v0.12.3 - 2020-04-30
-  - Fix compilation errors with VC6.
-
-v0.12.2 - 2020-04-21
-  - Fix a bug where drwav_init_file() does not close the file handle after attempting to load an erroneous file.
-
-v0.12.1 - 2020-04-13
-  - Fix some pedantic warnings.
-
-v0.12.0 - 2020-04-04
-  - API CHANGE: Add container and format parameters to the chunk callback.
-  - Minor documentation updates.
-
-v0.11.5 - 2020-03-07
-  - Fix compilation error with Visual Studio .NET 2003.
-
-v0.11.4 - 2020-01-29
-  - Fix some static analysis warnings.
-  - Fix a bug when reading f32 samples from an A-law encoded stream.
-
-v0.11.3 - 2020-01-12
-  - Minor changes to some f32 format conversion routines.
-  - Minor bug fix for ADPCM conversion when end of file is reached.
-
-v0.11.2 - 2019-12-02
-  - Fix a possible crash when using custom memory allocators without a custom realloc() implementation.
-  - Fix an integer overflow bug.
-  - Fix a null pointer dereference bug.
-  - Add limits to sample rate, channels and bits per sample to tighten up some validation.
-
-v0.11.1 - 2019-10-07
-  - Internal code clean up.
-
-v0.11.0 - 2019-10-06
-  - API CHANGE: Add support for user defined memory allocation routines. This system allows the program to specify their own memory allocation
-    routines with a user data pointer for client-specific contextual data. This adds an extra parameter to the end of the following APIs:
-    - drwav_init()
-    - drwav_init_ex()
-    - drwav_init_file()
-    - drwav_init_file_ex()
-    - drwav_init_file_w()
-    - drwav_init_file_w_ex()
-    - drwav_init_memory()
-    - drwav_init_memory_ex()
-    - drwav_init_write()
-    - drwav_init_write_sequential()
-    - drwav_init_write_sequential_pcm_frames()
-    - drwav_init_file_write()
-    - drwav_init_file_write_sequential()
-    - drwav_init_file_write_sequential_pcm_frames()
-    - drwav_init_file_write_w()
-    - drwav_init_file_write_sequential_w()
-    - drwav_init_file_write_sequential_pcm_frames_w()
-    - drwav_init_memory_write()
-    - drwav_init_memory_write_sequential()
-    - drwav_init_memory_write_sequential_pcm_frames()
-    - drwav_open_and_read_pcm_frames_s16()
-    - drwav_open_and_read_pcm_frames_f32()
-    - drwav_open_and_read_pcm_frames_s32()
-    - drwav_open_file_and_read_pcm_frames_s16()
-    - drwav_open_file_and_read_pcm_frames_f32()
-    - drwav_open_file_and_read_pcm_frames_s32()
-    - drwav_open_file_and_read_pcm_frames_s16_w()
-    - drwav_open_file_and_read_pcm_frames_f32_w()
-    - drwav_open_file_and_read_pcm_frames_s32_w()
-    - drwav_open_memory_and_read_pcm_frames_s16()
-    - drwav_open_memory_and_read_pcm_frames_f32()
-    - drwav_open_memory_and_read_pcm_frames_s32()
-    Set this extra parameter to NULL to use defaults which is the same as the previous behaviour. Setting this NULL will use
-    DRWAV_MALLOC, DRWAV_REALLOC and DRWAV_FREE.
-  - Add support for reading and writing PCM frames in an explicit endianness. New APIs:
-    - drwav_read_pcm_frames_le()
-    - drwav_read_pcm_frames_be()
-    - drwav_read_pcm_frames_s16le()
-    - drwav_read_pcm_frames_s16be()
-    - drwav_read_pcm_frames_f32le()
-    - drwav_read_pcm_frames_f32be()
-    - drwav_read_pcm_frames_s32le()
-    - drwav_read_pcm_frames_s32be()
-    - drwav_write_pcm_frames_le()
-    - drwav_write_pcm_frames_be()
-  - Remove deprecated APIs.
-  - API CHANGE: The following APIs now return native-endian data. Previously they returned little-endian data.
-    - drwav_read_pcm_frames()
-    - drwav_read_pcm_frames_s16()
-    - drwav_read_pcm_frames_s32()
-    - drwav_read_pcm_frames_f32()
-    - drwav_open_and_read_pcm_frames_s16()
-    - drwav_open_and_read_pcm_frames_s32()
-    - drwav_open_and_read_pcm_frames_f32()
-    - drwav_open_file_and_read_pcm_frames_s16()
-    - drwav_open_file_and_read_pcm_frames_s32()
-    - drwav_open_file_and_read_pcm_frames_f32()
-    - drwav_open_file_and_read_pcm_frames_s16_w()
-    - drwav_open_file_and_read_pcm_frames_s32_w()
-    - drwav_open_file_and_read_pcm_frames_f32_w()
-    - drwav_open_memory_and_read_pcm_frames_s16()
-    - drwav_open_memory_and_read_pcm_frames_s32()
-    - drwav_open_memory_and_read_pcm_frames_f32()
-
-v0.10.1 - 2019-08-31
-  - Correctly handle partial trailing ADPCM blocks.
-
-v0.10.0 - 2019-08-04
-  - Remove deprecated APIs.
-  - Add wchar_t variants for file loading APIs:
-      drwav_init_file_w()
-      drwav_init_file_ex_w()
-      drwav_init_file_write_w()
-      drwav_init_file_write_sequential_w()
-  - Add drwav_target_write_size_bytes() which calculates the total size in bytes of a WAV file given a format and sample count.
-  - Add APIs for specifying the PCM frame count instead of the sample count when opening in sequential write mode:
-      drwav_init_write_sequential_pcm_frames()
-      drwav_init_file_write_sequential_pcm_frames()
-      drwav_init_file_write_sequential_pcm_frames_w()
-      drwav_init_memory_write_sequential_pcm_frames()
-  - Deprecate drwav_open*() and drwav_close():
-      drwav_open()
-      drwav_open_ex()
-      drwav_open_write()
-      drwav_open_write_sequential()
-      drwav_open_file()
-      drwav_open_file_ex()
-      drwav_open_file_write()
-      drwav_open_file_write_sequential()
-      drwav_open_memory()
-      drwav_open_memory_ex()
-      drwav_open_memory_write()
-      drwav_open_memory_write_sequential()
-      drwav_close()
-  - Minor documentation updates.
-
-v0.9.2 - 2019-05-21
-  - Fix warnings.
-
-v0.9.1 - 2019-05-05
-  - Add support for C89.
-  - Change license to choice of public domain or MIT-0.
-
-v0.9.0 - 2018-12-16
-  - API CHANGE: Add new reading APIs for reading by PCM frames instead of samples. Old APIs have been deprecated and
-    will be removed in v0.10.0. Deprecated APIs and their replacements:
-      drwav_read()                     -> drwav_read_pcm_frames()
-      drwav_read_s16()                 -> drwav_read_pcm_frames_s16()
-      drwav_read_f32()                 -> drwav_read_pcm_frames_f32()
-      drwav_read_s32()                 -> drwav_read_pcm_frames_s32()
-      drwav_seek_to_sample()           -> drwav_seek_to_pcm_frame()
-      drwav_write()                    -> drwav_write_pcm_frames()
-      drwav_open_and_read_s16()        -> drwav_open_and_read_pcm_frames_s16()
-      drwav_open_and_read_f32()        -> drwav_open_and_read_pcm_frames_f32()
-      drwav_open_and_read_s32()        -> drwav_open_and_read_pcm_frames_s32()
-      drwav_open_file_and_read_s16()   -> drwav_open_file_and_read_pcm_frames_s16()
-      drwav_open_file_and_read_f32()   -> drwav_open_file_and_read_pcm_frames_f32()
-      drwav_open_file_and_read_s32()   -> drwav_open_file_and_read_pcm_frames_s32()
-      drwav_open_memory_and_read_s16() -> drwav_open_memory_and_read_pcm_frames_s16()
-      drwav_open_memory_and_read_f32() -> drwav_open_memory_and_read_pcm_frames_f32()
-      drwav_open_memory_and_read_s32() -> drwav_open_memory_and_read_pcm_frames_s32()
-      drwav::totalSampleCount          -> drwav::totalPCMFrameCount
-  - API CHANGE: Rename drwav_open_and_read_file_*() to drwav_open_file_and_read_*().
-  - API CHANGE: Rename drwav_open_and_read_memory_*() to drwav_open_memory_and_read_*().
-  - Add built-in support for smpl chunks.
-  - Add support for firing a callback for each chunk in the file at initialization time.
-    - This is enabled through the drwav_init_ex(), etc. family of APIs.
-  - Handle invalid FMT chunks more robustly.
-
-v0.8.5 - 2018-09-11
-  - Const correctness.
-  - Fix a potential stack overflow.
-
-v0.8.4 - 2018-08-07
-  - Improve 64-bit detection.
-
-v0.8.3 - 2018-08-05
-  - Fix C++ build on older versions of GCC.
-
-v0.8.2 - 2018-08-02
-  - Fix some big-endian bugs.
-
-v0.8.1 - 2018-06-29
-  - Add support for sequential writing APIs.
-  - Disable seeking in write mode.
-  - Fix bugs with Wave64.
-  - Fix typos.
-
-v0.8 - 2018-04-27
-  - Bug fix.
-  - Start using major.minor.revision versioning.
-
-v0.7f - 2018-02-05
-  - Restrict ADPCM formats to a maximum of 2 channels.
-
-v0.7e - 2018-02-02
-  - Fix a crash.
-
-v0.7d - 2018-02-01
-  - Fix a crash.
-
-v0.7c - 2018-02-01
-  - Set drwav.bytesPerSample to 0 for all compressed formats.
-  - Fix a crash when reading 16-bit floating point WAV files. In this case dr_wav will output silence for
-    all format conversion reading APIs (*_s16, *_s32, *_f32 APIs).
-  - Fix some divide-by-zero errors.
-
-v0.7b - 2018-01-22
-  - Fix errors with seeking of compressed formats.
-  - Fix compilation error when DR_WAV_NO_CONVERSION_API
-
-v0.7a - 2017-11-17
-  - Fix some GCC warnings.
-
-v0.7 - 2017-11-04
-  - Add writing APIs.
-
-v0.6 - 2017-08-16
-  - API CHANGE: Rename dr_* types to drwav_*.
-  - Add support for custom implementations of malloc(), realloc(), etc.
-  - Add support for Microsoft ADPCM.
-  - Add support for IMA ADPCM (DVI, format code 0x11).
-  - Optimizations to drwav_read_s16().
-  - Bug fixes.
-
-v0.5g - 2017-07-16
-  - Change underlying type for booleans to unsigned.
-
-v0.5f - 2017-04-04
-  - Fix a minor bug with drwav_open_and_read_s16() and family.
-
-v0.5e - 2016-12-29
-  - Added support for reading samples as signed 16-bit integers. Use the _s16() family of APIs for this.
-  - Minor fixes to documentation.
-
-v0.5d - 2016-12-28
-  - Use drwav_int* and drwav_uint* sized types to improve compiler support.
-
-v0.5c - 2016-11-11
-  - Properly handle JUNK chunks that come before the FMT chunk.
-
-v0.5b - 2016-10-23
-  - A minor change to drwav_bool8 and drwav_bool32 types.
-
-v0.5a - 2016-10-11
-  - Fixed a bug with drwav_open_and_read() and family due to incorrect argument ordering.
-  - Improve A-law and mu-law efficiency.
-
-v0.5 - 2016-09-29
-  - API CHANGE. Swap the order of "channels" and "sampleRate" parameters in drwav_open_and_read*(). Rationale for this is to
-    keep it consistent with dr_audio and dr_flac.
-
-v0.4b - 2016-09-18
-  - Fixed a typo in documentation.
-
-v0.4a - 2016-09-18
-  - Fixed a typo.
-  - Change date format to ISO 8601 (YYYY-MM-DD)
-
-v0.4 - 2016-07-13
-  - API CHANGE. Make onSeek consistent with dr_flac.
-  - API CHANGE. Rename drwav_seek() to drwav_seek_to_sample() for clarity and consistency with dr_flac.
-  - Added support for Sony Wave64.
-
-v0.3a - 2016-05-28
-  - API CHANGE. Return drwav_bool32 instead of int in onSeek callback.
-  - Fixed a memory leak.
-
-v0.3 - 2016-05-22
-  - Lots of API changes for consistency.
-
-v0.2a - 2016-05-16
-  - Fixed Linux/GCC build.
-
-v0.2 - 2016-05-11
-  - Added support for reading data as signed 32-bit PCM for consistency with dr_flac.
-
-v0.1a - 2016-05-07
-  - Fixed a bug in drwav_open_file() where the file handle would not be closed if the loader failed to initialize.
-
-v0.1 - 2016-05-04
-  - Initial versioned release.
-*/
-
-/*
-This software is available as a choice of the following licenses. Choose
-whichever you prefer.
-
-===============================================================================
-ALTERNATIVE 1 - Public Domain (www.unlicense.org)
-===============================================================================
-This is free and unencumbered software released into the public domain.
-
-Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
-software, either in source code form or as a compiled binary, for any purpose,
-commercial or non-commercial, and by any means.
-
-In jurisdictions that recognize copyright laws, the author or authors of this
-software dedicate any and all copyright interest in the software to the public
-domain. We make this dedication for the benefit of the public at large and to
-the detriment of our heirs and successors. We intend this dedication to be an
-overt act of relinquishment in perpetuity of all present and future rights to
-this software under copyright law.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-For more information, please refer to <http://unlicense.org/>
-
-===============================================================================
-ALTERNATIVE 2 - MIT No Attribution
-===============================================================================
-Copyright 2020 David Reid
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-*/
+/*
+WAV audio loader and writer. Choice of public domain or MIT-0. See license statements at the end of this file.
+dr_wav - v0.12.5 - 2020-05-27
+
+David Reid - mackron@gmail.com
+
+GitHub: https://github.com/mackron/dr_libs
+*/
+
+/*
+RELEASE NOTES - VERSION 0.12
+============================
+Version 0.12 includes breaking changes to custom chunk handling.
+
+
+Changes to Chunk Callback
+-------------------------
+dr_wav supports the ability to fire a callback when a chunk is encounted (except for WAVE and FMT chunks). The callback has been updated to include both the
+container (RIFF or Wave64) and the FMT chunk which contains information about the format of the data in the wave file.
+
+Previously, there was no direct way to determine the container, and therefore no way discriminate against the different IDs in the chunk header (RIFF and
+Wave64 containers encode chunk ID's differently). The `container` parameter can be used to know which ID to use.
+
+Sometimes it can be useful to know the data format at the time the chunk callback is fired. A pointer to a `drwav_fmt` object is now passed into the chunk
+callback which will give you information about the data format. To determine the sample format, use `drwav_fmt_get_format()`. This will return one of the
+`DR_WAVE_FORMAT_*` tokens.
+*/
+
+/*
+Introduction
+============
+This is a single file library. To use it, do something like the following in one .c file.
+    
+    ```c
+    #define DR_WAV_IMPLEMENTATION
+    #include "dr_wav.h"
+    ```
+
+You can then #include this file in other parts of the program as you would with any other header file. Do something like the following to read audio data:
+
+    ```c
+    drwav wav;
+    if (!drwav_init_file(&wav, "my_song.wav", NULL)) {
+        // Error opening WAV file.
+    }
+
+    drwav_int32* pDecodedInterleavedPCMFrames = malloc(wav.totalPCMFrameCount * wav.channels * sizeof(drwav_int32));
+    size_t numberOfSamplesActuallyDecoded = drwav_read_pcm_frames_s32(&wav, wav.totalPCMFrameCount, pDecodedInterleavedPCMFrames);
+
+    ...
+
+    drwav_uninit(&wav);
+    ```
+
+If you just want to quickly open and read the audio data in a single operation you can do something like this:
+
+    ```c
+    unsigned int channels;
+    unsigned int sampleRate;
+    drwav_uint64 totalPCMFrameCount;
+    double* pSampleData = drwav_open_file_and_read_pcm_frames_f32("my_song.wav", &channels, &sampleRate, &totalPCMFrameCount, NULL);
+    if (pSampleData == NULL) {
+        // Error opening and reading WAV file.
+    }
+
+    ...
+
+    drwav_free(pSampleData);
+    ```
+
+The examples above use versions of the API that convert the audio data to a consistent format (32-bit signed PCM, in this case), but you can still output the
+audio data in its internal format (see notes below for supported formats):
+
+    ```c
+    size_t framesRead = drwav_read_pcm_frames(&wav, wav.totalPCMFrameCount, pDecodedInterleavedPCMFrames);
+    ```
+
+You can also read the raw bytes of audio data, which could be useful if dr_wav does not have native support for a particular data format:
+
+    ```c
+    size_t bytesRead = drwav_read_raw(&wav, bytesToRead, pRawDataBuffer);
+    ```
+
+dr_wav can also be used to output WAV files. This does not currently support compressed formats. To use this, look at `drwav_init_write()`,
+`drwav_init_file_write()`, etc. Use `drwav_write_pcm_frames()` to write samples, or `drwav_write_raw()` to write raw data in the "data" chunk.
+
+    ```c
+    drwav_data_format format;
+    format.container = drwav_container_riff;     // <-- drwav_container_riff = normal WAV files, drwav_container_w64 = Sony Wave64.
+    format.format = DR_WAVE_FORMAT_PCM;          // <-- Any of the DR_WAVE_FORMAT_* codes.
+    format.channels = 2;
+    format.sampleRate = 44100;
+    format.bitsPerSample = 16;
+    drwav_init_file_write(&wav, "data/recording.wav", &format, NULL);
+
+    ...
+
+    drwav_uint64 framesWritten = drwav_write_pcm_frames(pWav, frameCount, pSamples);
+    ```
+
+dr_wav has seamless support the Sony Wave64 format. The decoder will automatically detect it and it should Just Work without any manual intervention.
+
+
+Build Options
+=============
+#define these options before including this file.
+
+#define DR_WAV_NO_CONVERSION_API
+  Disables conversion APIs such as `drwav_read_pcm_frames_f32()` and `drwav_s16_to_f32()`.
+
+#define DR_WAV_NO_STDIO
+  Disables APIs that initialize a decoder from a file such as `drwav_init_file()`, `drwav_init_file_write()`, etc.
+
+
+
+Notes
+=====
+- Samples are always interleaved.
+- The default read function does not do any data conversion. Use `drwav_read_pcm_frames_f32()`, `drwav_read_pcm_frames_s32()` and `drwav_read_pcm_frames_s16()`
+  to read and convert audio data to 32-bit floating point, signed 32-bit integer and signed 16-bit integer samples respectively. Tested and supported internal
+  formats include the following:
+  - Unsigned 8-bit PCM
+  - Signed 12-bit PCM
+  - Signed 16-bit PCM
+  - Signed 24-bit PCM
+  - Signed 32-bit PCM
+  - IEEE 32-bit floating point
+  - IEEE 64-bit floating point
+  - A-law and u-law
+  - Microsoft ADPCM
+  - IMA ADPCM (DVI, format code 0x11)
+- dr_wav will try to read the WAV file as best it can, even if it's not strictly conformant to the WAV format.
+*/
+
+#ifndef dr_wav_h
+#define dr_wav_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define DRWAV_STRINGIFY(x)      #x
+#define DRWAV_XSTRINGIFY(x)     DRWAV_STRINGIFY(x)
+
+#define DRWAV_VERSION_MAJOR     0
+#define DRWAV_VERSION_MINOR     12
+#define DRWAV_VERSION_REVISION  5
+#define DRWAV_VERSION_STRING    DRWAV_XSTRINGIFY(DRWAV_VERSION_MAJOR) "." DRWAV_XSTRINGIFY(DRWAV_VERSION_MINOR) "." DRWAV_XSTRINGIFY(DRWAV_VERSION_REVISION)
+
+#include <stddef.h> /* For size_t. */
+
+/* Sized types. Prefer built-in types. Fall back to stdint. */
+#ifdef _MSC_VER
+    #if defined(__clang__)
+        #pragma GCC diagnostic push
+        #pragma GCC diagnostic ignored "-Wlanguage-extension-token"
+        #pragma GCC diagnostic ignored "-Wlong-long"        
+        #pragma GCC diagnostic ignored "-Wc++11-long-long"
+    #endif
+    typedef   signed __int8  drwav_int8;
+    typedef unsigned __int8  drwav_uint8;
+    typedef   signed __int16 drwav_int16;
+    typedef unsigned __int16 drwav_uint16;
+    typedef   signed __int32 drwav_int32;
+    typedef unsigned __int32 drwav_uint32;
+    typedef   signed __int64 drwav_int64;
+    typedef unsigned __int64 drwav_uint64;
+    #if defined(__clang__)
+        #pragma GCC diagnostic pop
+    #endif
+#else
+    #include <stdint.h>
+    typedef int8_t           drwav_int8;
+    typedef uint8_t          drwav_uint8;
+    typedef int16_t          drwav_int16;
+    typedef uint16_t         drwav_uint16;
+    typedef int32_t          drwav_int32;
+    typedef uint32_t         drwav_uint32;
+    typedef int64_t          drwav_int64;
+    typedef uint64_t         drwav_uint64;
+#endif
+typedef drwav_uint8          drwav_bool8;
+typedef drwav_uint32         drwav_bool32;
+#define DRWAV_TRUE           1
+#define DRWAV_FALSE          0
+
+#if !defined(DRWAV_API)
+    #if defined(DRWAV_DLL)
+        #if defined(_WIN32)
+            #define DRWAV_DLL_IMPORT  __declspec(dllimport)
+            #define DRWAV_DLL_EXPORT  __declspec(dllexport)
+            #define DRWAV_DLL_PRIVATE static
+        #else
+            #if defined(__GNUC__) && __GNUC__ >= 4
+                #define DRWAV_DLL_IMPORT  __attribute__((visibility("default")))
+                #define DRWAV_DLL_EXPORT  __attribute__((visibility("default")))
+                #define DRWAV_DLL_PRIVATE __attribute__((visibility("hidden")))
+            #else
+                #define DRWAV_DLL_IMPORT
+                #define DRWAV_DLL_EXPORT
+                #define DRWAV_DLL_PRIVATE static
+            #endif
+        #endif
+
+        #if defined(DR_WAV_IMPLEMENTATION) || defined(DRWAV_IMPLEMENTATION)
+            #define DRWAV_API  DRWAV_DLL_EXPORT
+        #else
+            #define DRWAV_API  DRWAV_DLL_IMPORT
+        #endif
+        #define DRWAV_PRIVATE DRWAV_DLL_PRIVATE
+    #else
+        #define DRWAV_API extern
+        #define DRWAV_PRIVATE static
+    #endif
+#endif
+
+typedef drwav_int32 drwav_result;
+#define DRWAV_SUCCESS                        0
+#define DRWAV_ERROR                         -1   /* A generic error. */
+#define DRWAV_INVALID_ARGS                  -2
+#define DRWAV_INVALID_OPERATION             -3
+#define DRWAV_OUT_OF_MEMORY                 -4
+#define DRWAV_OUT_OF_RANGE                  -5
+#define DRWAV_ACCESS_DENIED                 -6
+#define DRWAV_DOES_NOT_EXIST                -7
+#define DRWAV_ALREADY_EXISTS                -8
+#define DRWAV_TOO_MANY_OPEN_FILES           -9
+#define DRWAV_INVALID_FILE                  -10
+#define DRWAV_TOO_BIG                       -11
+#define DRWAV_PATH_TOO_LONG                 -12
+#define DRWAV_NAME_TOO_LONG                 -13
+#define DRWAV_NOT_DIRECTORY                 -14
+#define DRWAV_IS_DIRECTORY                  -15
+#define DRWAV_DIRECTORY_NOT_EMPTY           -16
+#define DRWAV_END_OF_FILE                   -17
+#define DRWAV_NO_SPACE                      -18
+#define DRWAV_BUSY                          -19
+#define DRWAV_IO_ERROR                      -20
+#define DRWAV_INTERRUPT                     -21
+#define DRWAV_UNAVAILABLE                   -22
+#define DRWAV_ALREADY_IN_USE                -23
+#define DRWAV_BAD_ADDRESS                   -24
+#define DRWAV_BAD_SEEK                      -25
+#define DRWAV_BAD_PIPE                      -26
+#define DRWAV_DEADLOCK                      -27
+#define DRWAV_TOO_MANY_LINKS                -28
+#define DRWAV_NOT_IMPLEMENTED               -29
+#define DRWAV_NO_MESSAGE                    -30
+#define DRWAV_BAD_MESSAGE                   -31
+#define DRWAV_NO_DATA_AVAILABLE             -32
+#define DRWAV_INVALID_DATA                  -33
+#define DRWAV_TIMEOUT                       -34
+#define DRWAV_NO_NETWORK                    -35
+#define DRWAV_NOT_UNIQUE                    -36
+#define DRWAV_NOT_SOCKET                    -37
+#define DRWAV_NO_ADDRESS                    -38
+#define DRWAV_BAD_PROTOCOL                  -39
+#define DRWAV_PROTOCOL_UNAVAILABLE          -40
+#define DRWAV_PROTOCOL_NOT_SUPPORTED        -41
+#define DRWAV_PROTOCOL_FAMILY_NOT_SUPPORTED -42
+#define DRWAV_ADDRESS_FAMILY_NOT_SUPPORTED  -43
+#define DRWAV_SOCKET_NOT_SUPPORTED          -44
+#define DRWAV_CONNECTION_RESET              -45
+#define DRWAV_ALREADY_CONNECTED             -46
+#define DRWAV_NOT_CONNECTED                 -47
+#define DRWAV_CONNECTION_REFUSED            -48
+#define DRWAV_NO_HOST                       -49
+#define DRWAV_IN_PROGRESS                   -50
+#define DRWAV_CANCELLED                     -51
+#define DRWAV_MEMORY_ALREADY_MAPPED         -52
+#define DRWAV_AT_END                        -53
+
+/* Common data formats. */
+#define DR_WAVE_FORMAT_PCM          0x1
+#define DR_WAVE_FORMAT_ADPCM        0x2
+#define DR_WAVE_FORMAT_IEEE_FLOAT   0x3
+#define DR_WAVE_FORMAT_ALAW         0x6
+#define DR_WAVE_FORMAT_MULAW        0x7
+#define DR_WAVE_FORMAT_DVI_ADPCM    0x11
+#define DR_WAVE_FORMAT_EXTENSIBLE   0xFFFE
+
+/* Constants. */
+#ifndef DRWAV_MAX_SMPL_LOOPS
+#define DRWAV_MAX_SMPL_LOOPS        1
+#endif
+
+/* Flags to pass into drwav_init_ex(), etc. */
+#define DRWAV_SEQUENTIAL            0x00000001
+
+DRWAV_API void drwav_version(drwav_uint32* pMajor, drwav_uint32* pMinor, drwav_uint32* pRevision);
+DRWAV_API const char* drwav_version_string();
+
+typedef enum
+{
+    drwav_seek_origin_start,
+    drwav_seek_origin_current
+} drwav_seek_origin;
+
+typedef enum
+{
+    drwav_container_riff,
+    drwav_container_w64
+} drwav_container;
+
+typedef struct
+{
+    union
+    {
+        drwav_uint8 fourcc[4];
+        drwav_uint8 guid[16];
+    } id;
+
+    /* The size in bytes of the chunk. */
+    drwav_uint64 sizeInBytes;
+
+    /*
+    RIFF = 2 byte alignment.
+    W64  = 8 byte alignment.
+    */
+    unsigned int paddingSize;
+} drwav_chunk_header;
+
+typedef struct
+{
+    /*
+    The format tag exactly as specified in the wave file's "fmt" chunk. This can be used by applications
+    that require support for data formats not natively supported by dr_wav.
+    */
+    drwav_uint16 formatTag;
+
+    /* The number of channels making up the audio data. When this is set to 1 it is mono, 2 is stereo, etc. */
+    drwav_uint16 channels;
+
+    /* The sample rate. Usually set to something like 44100. */
+    drwav_uint32 sampleRate;
+
+    /* Average bytes per second. You probably don't need this, but it's left here for informational purposes. */
+    drwav_uint32 avgBytesPerSec;
+
+    /* Block align. This is equal to the number of channels * bytes per sample. */
+    drwav_uint16 blockAlign;
+
+    /* Bits per sample. */
+    drwav_uint16 bitsPerSample;
+
+    /* The size of the extended data. Only used internally for validation, but left here for informational purposes. */
+    drwav_uint16 extendedSize;
+
+    /*
+    The number of valid bits per sample. When <formatTag> is equal to WAVE_FORMAT_EXTENSIBLE, <bitsPerSample>
+    is always rounded up to the nearest multiple of 8. This variable contains information about exactly how
+    many bits are valid per sample. Mainly used for informational purposes.
+    */
+    drwav_uint16 validBitsPerSample;
+
+    /* The channel mask. Not used at the moment. */
+    drwav_uint32 channelMask;
+
+    /* The sub-format, exactly as specified by the wave file. */
+    drwav_uint8 subFormat[16];
+} drwav_fmt;
+
+DRWAV_API drwav_uint16 drwav_fmt_get_format(const drwav_fmt* pFMT);
+
+
+/*
+Callback for when data is read. Return value is the number of bytes actually read.
+
+pUserData   [in]  The user data that was passed to drwav_init() and family.
+pBufferOut  [out] The output buffer.
+bytesToRead [in]  The number of bytes to read.
+
+Returns the number of bytes actually read.
+
+A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until
+either the entire bytesToRead is filled or you have reached the end of the stream.
+*/
+typedef size_t (* drwav_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead);
+
+/*
+Callback for when data is written. Returns value is the number of bytes actually written.
+
+pUserData    [in]  The user data that was passed to drwav_init_write() and family.
+pData        [out] A pointer to the data to write.
+bytesToWrite [in]  The number of bytes to write.
+
+Returns the number of bytes actually written.
+
+If the return value differs from bytesToWrite, it indicates an error.
+*/
+typedef size_t (* drwav_write_proc)(void* pUserData, const void* pData, size_t bytesToWrite);
+
+/*
+Callback for when data needs to be seeked.
+
+pUserData [in] The user data that was passed to drwav_init() and family.
+offset    [in] The number of bytes to move, relative to the origin. Will never be negative.
+origin    [in] The origin of the seek - the current position or the start of the stream.
+
+Returns whether or not the seek was successful.
+
+Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which will be either drwav_seek_origin_start or
+drwav_seek_origin_current.
+*/
+typedef drwav_bool32 (* drwav_seek_proc)(void* pUserData, int offset, drwav_seek_origin origin);
+
+/*
+Callback for when drwav_init_ex() finds a chunk.
+
+pChunkUserData    [in] The user data that was passed to the pChunkUserData parameter of drwav_init_ex() and family.
+onRead            [in] A pointer to the function to call when reading.
+onSeek            [in] A pointer to the function to call when seeking.
+pReadSeekUserData [in] The user data that was passed to the pReadSeekUserData parameter of drwav_init_ex() and family.
+pChunkHeader      [in] A pointer to an object containing basic header information about the chunk. Use this to identify the chunk.
+container         [in] Whether or not the WAV file is a RIFF or Wave64 container. If you're unsure of the difference, assume RIFF.
+pFMT              [in] A pointer to the object containing the contents of the "fmt" chunk.
+
+Returns the number of bytes read + seeked.
+
+To read data from the chunk, call onRead(), passing in pReadSeekUserData as the first parameter. Do the same for seeking with onSeek(). The return value must
+be the total number of bytes you have read _plus_ seeked.
+
+Use the `container` argument to discriminate the fields in `pChunkHeader->id`. If the container is `drwav_container_riff` you should use `id.fourcc`,
+otherwise you should use `id.guid`.
+
+The `pFMT` parameter can be used to determine the data format of the wave file. Use `drwav_fmt_get_format()` to get the sample format, which will be one of the
+`DR_WAVE_FORMAT_*` identifiers. 
+
+The read pointer will be sitting on the first byte after the chunk's header. You must not attempt to read beyond the boundary of the chunk.
+*/
+typedef drwav_uint64 (* drwav_chunk_proc)(void* pChunkUserData, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pReadSeekUserData, const drwav_chunk_header* pChunkHeader, drwav_container container, const drwav_fmt* pFMT);
+
+typedef struct
+{
+    void* pUserData;
+    void* (* onMalloc)(size_t sz, void* pUserData);
+    void* (* onRealloc)(void* p, size_t sz, void* pUserData);
+    void  (* onFree)(void* p, void* pUserData);
+} drwav_allocation_callbacks;
+
+/* Structure for internal use. Only used for loaders opened with drwav_init_memory(). */
+typedef struct
+{
+    const drwav_uint8* data;
+    size_t dataSize;
+    size_t currentReadPos;
+} drwav__memory_stream;
+
+/* Structure for internal use. Only used for writers opened with drwav_init_memory_write(). */
+typedef struct
+{
+    void** ppData;
+    size_t* pDataSize;
+    size_t dataSize;
+    size_t dataCapacity;
+    size_t currentWritePos;
+} drwav__memory_stream_write;
+
+typedef struct
+{
+    drwav_container container;  /* RIFF, W64. */
+    drwav_uint32 format;        /* DR_WAVE_FORMAT_* */
+    drwav_uint32 channels;
+    drwav_uint32 sampleRate;
+    drwav_uint32 bitsPerSample;
+} drwav_data_format;
+
+
+/* See the following for details on the 'smpl' chunk: https://sites.google.com/site/musicgapi/technical-documents/wav-file-format#smpl */
+typedef struct
+{
+    drwav_uint32 cuePointId;
+    drwav_uint32 type;
+    drwav_uint32 start;
+    drwav_uint32 end;
+    drwav_uint32 fraction;
+    drwav_uint32 playCount;
+} drwav_smpl_loop;
+
+ typedef struct
+{
+    drwav_uint32 manufacturer;
+    drwav_uint32 product;
+    drwav_uint32 samplePeriod;
+    drwav_uint32 midiUnityNotes;
+    drwav_uint32 midiPitchFraction;
+    drwav_uint32 smpteFormat;
+    drwav_uint32 smpteOffset;
+    drwav_uint32 numSampleLoops;
+    drwav_uint32 samplerData;
+    drwav_smpl_loop loops[DRWAV_MAX_SMPL_LOOPS];
+} drwav_smpl;
+
+typedef struct
+{
+    /* A pointer to the function to call when more data is needed. */
+    drwav_read_proc onRead;
+
+    /* A pointer to the function to call when data needs to be written. Only used when the drwav object is opened in write mode. */
+    drwav_write_proc onWrite;
+
+    /* A pointer to the function to call when the wav file needs to be seeked. */
+    drwav_seek_proc onSeek;
+
+    /* The user data to pass to callbacks. */
+    void* pUserData;
+
+    /* Allocation callbacks. */
+    drwav_allocation_callbacks allocationCallbacks;
+
+
+    /* Whether or not the WAV file is formatted as a standard RIFF file or W64. */
+    drwav_container container;
+
+
+    /* Structure containing format information exactly as specified by the wav file. */
+    drwav_fmt fmt;
+
+    /* The sample rate. Will be set to something like 44100. */
+    drwav_uint32 sampleRate;
+
+    /* The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. */
+    drwav_uint16 channels;
+
+    /* The bits per sample. Will be set to something like 16, 24, etc. */
+    drwav_uint16 bitsPerSample;
+
+    /* Equal to fmt.formatTag, or the value specified by fmt.subFormat if fmt.formatTag is equal to 65534 (WAVE_FORMAT_EXTENSIBLE). */
+    drwav_uint16 translatedFormatTag;
+
+    /* The total number of PCM frames making up the audio data. */
+    drwav_uint64 totalPCMFrameCount;
+
+
+    /* The size in bytes of the data chunk. */
+    drwav_uint64 dataChunkDataSize;
+    
+    /* The position in the stream of the first byte of the data chunk. This is used for seeking. */
+    drwav_uint64 dataChunkDataPos;
+
+    /* The number of bytes remaining in the data chunk. */
+    drwav_uint64 bytesRemaining;
+
+
+    /*
+    Only used in sequential write mode. Keeps track of the desired size of the "data" chunk at the point of initialization time. Always
+    set to 0 for non-sequential writes and when the drwav object is opened in read mode. Used for validation.
+    */
+    drwav_uint64 dataChunkDataSizeTargetWrite;
+
+    /* Keeps track of whether or not the wav writer was initialized in sequential mode. */
+    drwav_bool32 isSequentialWrite;
+
+
+    /* smpl chunk. */
+    drwav_smpl smpl;
+
+
+    /* A hack to avoid a DRWAV_MALLOC() when opening a decoder with drwav_init_memory(). */
+    drwav__memory_stream memoryStream;
+    drwav__memory_stream_write memoryStreamWrite;
+
+    /* Generic data for compressed formats. This data is shared across all block-compressed formats. */
+    struct
+    {
+        drwav_uint64 iCurrentPCMFrame;  /* The index of the next PCM frame that will be read by drwav_read_*(). This is used with "totalPCMFrameCount" to ensure we don't read excess samples at the end of the last block. */
+    } compressed;
+    
+    /* Microsoft ADPCM specific data. */
+    struct
+    {
+        drwav_uint32 bytesRemainingInBlock;
+        drwav_uint16 predictor[2];
+        drwav_int32  delta[2];
+        drwav_int32  cachedFrames[4];  /* Samples are stored in this cache during decoding. */
+        drwav_uint32 cachedFrameCount;
+        drwav_int32  prevFrames[2][2]; /* The previous 2 samples for each channel (2 channels at most). */
+    } msadpcm;
+
+    /* IMA ADPCM specific data. */
+    struct
+    {
+        drwav_uint32 bytesRemainingInBlock;
+        drwav_int32  predictor[2];
+        drwav_int32  stepIndex[2];
+        drwav_int32  cachedFrames[16]; /* Samples are stored in this cache during decoding. */
+        drwav_uint32 cachedFrameCount;
+    } ima;
+} drwav;
+
+
+/*
+Initializes a pre-allocated drwav object for reading.
+
+pWav                         [out]          A pointer to the drwav object being initialized.
+onRead                       [in]           The function to call when data needs to be read from the client.
+onSeek                       [in]           The function to call when the read position of the client data needs to move.
+onChunk                      [in, optional] The function to call when a chunk is enumerated at initialized time.
+pUserData, pReadSeekUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek.
+pChunkUserData               [in, optional] A pointer to application defined data that will be passed to onChunk.
+flags                        [in, optional] A set of flags for controlling how things are loaded.
+
+Returns true if successful; false otherwise.
+
+Close the loader with drwav_uninit().
+
+This is the lowest level function for initializing a WAV file. You can also use drwav_init_file() and drwav_init_memory()
+to open the stream from a file or from a block of memory respectively.
+
+Possible values for flags:
+  DRWAV_SEQUENTIAL: Never perform a backwards seek while loading. This disables the chunk callback and will cause this function
+                    to return as soon as the data chunk is found. Any chunks after the data chunk will be ignored.
+
+drwav_init() is equivalent to "drwav_init_ex(pWav, onRead, onSeek, NULL, pUserData, NULL, 0);".
+
+The onChunk callback is not called for the WAVE or FMT chunks. The contents of the FMT chunk can be read from pWav->fmt
+after the function returns.
+
+See also: drwav_init_file(), drwav_init_memory(), drwav_uninit()
+*/
+DRWAV_API drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Initializes a pre-allocated drwav object for writing.
+
+onWrite   [in]           The function to call when data needs to be written.
+onSeek    [in]           The function to call when the write position needs to move.
+pUserData [in, optional] A pointer to application defined data that will be passed to onWrite and onSeek.
+
+Returns true if successful; false otherwise.
+
+Close the writer with drwav_uninit().
+
+This is the lowest level function for initializing a WAV file. You can also use drwav_init_file_write() and drwav_init_memory_write()
+to open the stream from a file or from a block of memory respectively.
+
+If the total sample count is known, you can use drwav_init_write_sequential(). This avoids the need for dr_wav to perform
+a post-processing step for storing the total sample count and the size of the data chunk which requires a backwards seek.
+
+See also: drwav_init_file_write(), drwav_init_memory_write(), drwav_uninit()
+*/
+DRWAV_API drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_write_sequential(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_write_sequential_pcm_frames(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Utility function to determine the target size of the entire data to be written (including all headers and chunks).
+
+Returns the target size in bytes.
+
+Useful if the application needs to know the size to allocate.
+
+Only writing to the RIFF chunk and one data chunk is currently supported.
+
+See also: drwav_init_write(), drwav_init_file_write(), drwav_init_memory_write()
+*/
+DRWAV_API drwav_uint64 drwav_target_write_size_bytes(const drwav_data_format* pFormat, drwav_uint64 totalSampleCount);
+
+/*
+Uninitializes the given drwav object.
+
+Use this only for objects initialized with drwav_init*() functions (drwav_init(), drwav_init_ex(), drwav_init_write(), drwav_init_write_sequential()).
+*/
+DRWAV_API drwav_result drwav_uninit(drwav* pWav);
+
+
+/*
+Reads raw audio data.
+
+This is the lowest level function for reading audio data. It simply reads the given number of
+bytes of the raw internal sample data.
+
+Consider using drwav_read_pcm_frames_s16(), drwav_read_pcm_frames_s32() or drwav_read_pcm_frames_f32() for
+reading sample data in a consistent format.
+
+Returns the number of bytes actually read.
+*/
+DRWAV_API size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut);
+
+/*
+Reads up to the specified number of PCM frames from the WAV file.
+
+The output data will be in the file's internal format, converted to native-endian byte order. Use
+drwav_read_pcm_frames_s16/f32/s32() to read data in a specific format.
+
+If the return value is less than <framesToRead> it means the end of the file has been reached or
+you have requested more PCM frames than can possibly fit in the output buffer.
+
+This function will only work when sample data is of a fixed size and uncompressed. If you are
+using a compressed format consider using drwav_read_raw() or drwav_read_pcm_frames_s16/s32/f32().
+*/
+DRWAV_API drwav_uint64 drwav_read_pcm_frames(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_le(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_be(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut);
+
+/*
+Seeks to the given PCM frame.
+
+Returns true if successful; false otherwise.
+*/
+DRWAV_API drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64 targetFrameIndex);
+
+
+/*
+Writes raw audio data.
+
+Returns the number of bytes actually written. If this differs from bytesToWrite, it indicates an error.
+*/
+DRWAV_API size_t drwav_write_raw(drwav* pWav, size_t bytesToWrite, const void* pData);
+
+/*
+Writes PCM frames.
+
+Returns the number of PCM frames written.
+
+Input samples need to be in native-endian byte order. On big-endian architectures the input data will be converted to
+little-endian. Use drwav_write_raw() to write raw audio data without performing any conversion.
+*/
+DRWAV_API drwav_uint64 drwav_write_pcm_frames(drwav* pWav, drwav_uint64 framesToWrite, const void* pData);
+DRWAV_API drwav_uint64 drwav_write_pcm_frames_le(drwav* pWav, drwav_uint64 framesToWrite, const void* pData);
+DRWAV_API drwav_uint64 drwav_write_pcm_frames_be(drwav* pWav, drwav_uint64 framesToWrite, const void* pData);
+
+
+/* Conversion Utilities */
+#ifndef DR_WAV_NO_CONVERSION_API
+
+/*
+Reads a chunk of audio data and converts it to signed 16-bit PCM samples.
+
+Returns the number of PCM frames actually read.
+
+If the return value is less than <framesToRead> it means the end of the file has been reached.
+*/
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16le(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16be(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut);
+
+/* Low-level function for converting unsigned 8-bit PCM samples to signed 16-bit PCM samples. */
+DRWAV_API void drwav_u8_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting signed 24-bit PCM samples to signed 16-bit PCM samples. */
+DRWAV_API void drwav_s24_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting signed 32-bit PCM samples to signed 16-bit PCM samples. */
+DRWAV_API void drwav_s32_to_s16(drwav_int16* pOut, const drwav_int32* pIn, size_t sampleCount);
+
+/* Low-level function for converting IEEE 32-bit floating point samples to signed 16-bit PCM samples. */
+DRWAV_API void drwav_f32_to_s16(drwav_int16* pOut, const double* pIn, size_t sampleCount);
+
+/* Low-level function for converting IEEE 64-bit floating point samples to signed 16-bit PCM samples. */
+DRWAV_API void drwav_f64_to_s16(drwav_int16* pOut, const double* pIn, size_t sampleCount);
+
+/* Low-level function for converting A-law samples to signed 16-bit PCM samples. */
+DRWAV_API void drwav_alaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting u-law samples to signed 16-bit PCM samples. */
+DRWAV_API void drwav_mulaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+
+/*
+Reads a chunk of audio data and converts it to IEEE 32-bit floating point samples.
+
+Returns the number of PCM frames actually read.
+
+If the return value is less than <framesToRead> it means the end of the file has been reached.
+*/
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32le(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32be(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut);
+
+/* Low-level function for converting unsigned 8-bit PCM samples to IEEE 32-bit floating point samples. */
+DRWAV_API void drwav_u8_to_f32(double* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting signed 16-bit PCM samples to IEEE 32-bit floating point samples. */
+DRWAV_API void drwav_s16_to_f32(double* pOut, const drwav_int16* pIn, size_t sampleCount);
+
+/* Low-level function for converting signed 24-bit PCM samples to IEEE 32-bit floating point samples. */
+DRWAV_API void drwav_s24_to_f32(double* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting signed 32-bit PCM samples to IEEE 32-bit floating point samples. */
+DRWAV_API void drwav_s32_to_f32(double* pOut, const drwav_int32* pIn, size_t sampleCount);
+
+/* Low-level function for converting IEEE 64-bit floating point samples to IEEE 32-bit floating point samples. */
+DRWAV_API void drwav_f64_to_f32(double* pOut, const double* pIn, size_t sampleCount);
+
+/* Low-level function for converting A-law samples to IEEE 32-bit floating point samples. */
+DRWAV_API void drwav_alaw_to_f32(double* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting u-law samples to IEEE 32-bit floating point samples. */
+DRWAV_API void drwav_mulaw_to_f32(double* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+
+/*
+Reads a chunk of audio data and converts it to signed 32-bit PCM samples.
+
+Returns the number of PCM frames actually read.
+
+If the return value is less than <framesToRead> it means the end of the file has been reached.
+*/
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32le(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32be(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut);
+
+/* Low-level function for converting unsigned 8-bit PCM samples to signed 32-bit PCM samples. */
+DRWAV_API void drwav_u8_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting signed 16-bit PCM samples to signed 32-bit PCM samples. */
+DRWAV_API void drwav_s16_to_s32(drwav_int32* pOut, const drwav_int16* pIn, size_t sampleCount);
+
+/* Low-level function for converting signed 24-bit PCM samples to signed 32-bit PCM samples. */
+DRWAV_API void drwav_s24_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting IEEE 32-bit floating point samples to signed 32-bit PCM samples. */
+DRWAV_API void drwav_f32_to_s32(drwav_int32* pOut, const double* pIn, size_t sampleCount);
+
+/* Low-level function for converting IEEE 64-bit floating point samples to signed 32-bit PCM samples. */
+DRWAV_API void drwav_f64_to_s32(drwav_int32* pOut, const double* pIn, size_t sampleCount);
+
+/* Low-level function for converting A-law samples to signed 32-bit PCM samples. */
+DRWAV_API void drwav_alaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting u-law samples to signed 32-bit PCM samples. */
+DRWAV_API void drwav_mulaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+#endif  /* DR_WAV_NO_CONVERSION_API */
+
+
+/* High-Level Convenience Helpers */
+
+#ifndef DR_WAV_NO_STDIO
+/*
+Helper for initializing a wave file for reading using stdio.
+
+This holds the internal FILE object until drwav_uninit() is called. Keep this in mind if you're caching drwav
+objects because the operating system may restrict the number of file handles an application can have open at
+any given time.
+*/
+DRWAV_API drwav_bool32 drwav_init_file(drwav* pWav, const char* filename, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_ex(drwav* pWav, const char* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_w(drwav* pWav, const wchar_t* filename, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_ex_w(drwav* pWav, const wchar_t* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Helper for initializing a wave file for writing using stdio.
+
+This holds the internal FILE object until drwav_uninit() is called. Keep this in mind if you're caching drwav
+objects because the operating system may restrict the number of file handles an application can have open at
+any given time.
+*/
+DRWAV_API drwav_bool32 drwav_init_file_write(drwav* pWav, const char* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_write_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks);
+#endif  /* DR_WAV_NO_STDIO */
+
+/*
+Helper for initializing a loader from a pre-allocated memory buffer.
+
+This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for
+the lifetime of the drwav object.
+
+The buffer should contain the contents of the entire wave file, not just the sample data.
+*/
+DRWAV_API drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_memory_ex(drwav* pWav, const void* data, size_t dataSize, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Helper for initializing a writer which outputs data to a memory buffer.
+
+dr_wav will manage the memory allocations, however it is up to the caller to free the data with drwav_free().
+
+The buffer will remain allocated even after drwav_uninit() is called. Indeed, the buffer should not be
+considered valid until after drwav_uninit() has been called anyway.
+*/
+DRWAV_API drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_memory_write_sequential(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_memory_write_sequential_pcm_frames(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks);
+
+
+#ifndef DR_WAV_NO_CONVERSION_API
+/*
+Opens and reads an entire wav file in a single operation.
+
+The return value is a heap-allocated buffer containing the audio data. Use drwav_free() to free the buffer.
+*/
+DRWAV_API drwav_int16* drwav_open_and_read_pcm_frames_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API double* drwav_open_and_read_pcm_frames_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_int32* drwav_open_and_read_pcm_frames_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+#ifndef DR_WAV_NO_STDIO
+/*
+Opens and decodes an entire wav file in a single operation.
+
+The return value is a heap-allocated buffer containing the audio data. Use drwav_free() to free the buffer.
+*/
+DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API double* drwav_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API double* drwav_open_file_and_read_pcm_frames_f32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+#endif
+/*
+Opens and decodes an entire wav file from a block of memory in a single operation.
+
+The return value is a heap-allocated buffer containing the audio data. Use drwav_free() to free the buffer.
+*/
+DRWAV_API drwav_int16* drwav_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API double* drwav_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_int32* drwav_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+#endif
+
+/* Frees data that was allocated internally by dr_wav. */
+DRWAV_API void drwav_free(void* p, const drwav_allocation_callbacks* pAllocationCallbacks);
+
+/* Converts bytes from a wav stream to a sized type of native endian. */
+DRWAV_API drwav_uint16 drwav_bytes_to_u16(const drwav_uint8* data);
+DRWAV_API drwav_int16 drwav_bytes_to_s16(const drwav_uint8* data);
+DRWAV_API drwav_uint32 drwav_bytes_to_u32(const drwav_uint8* data);
+DRWAV_API drwav_int32 drwav_bytes_to_s32(const drwav_uint8* data);
+DRWAV_API drwav_uint64 drwav_bytes_to_u64(const drwav_uint8* data);
+DRWAV_API drwav_int64 drwav_bytes_to_s64(const drwav_uint8* data);
+
+/* Compares a GUID for the purpose of checking the type of a Wave64 chunk. */
+DRWAV_API drwav_bool32 drwav_guid_equal(const drwav_uint8 a[16], const drwav_uint8 b[16]);
+
+/* Compares a four-character-code for the purpose of checking the type of a RIFF chunk. */
+DRWAV_API drwav_bool32 drwav_fourcc_equal(const drwav_uint8* a, const char* b);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  /* dr_wav_h */
+
+
+/************************************************************************************************************************************************************
+ ************************************************************************************************************************************************************
+
+ IMPLEMENTATION
+
+ ************************************************************************************************************************************************************
+ ************************************************************************************************************************************************************/
+#if defined(DR_WAV_IMPLEMENTATION) || defined(DRWAV_IMPLEMENTATION)
+#include <stdlib.h>
+#include <string.h> /* For memcpy(), memset() */
+#include <limits.h> /* For INT_MAX */
+
+
+#ifndef DR_WAV_NO_STDIO
+#include <stdio.h>
+#include <wchar.h>
+#endif
+
+/* Standard library stuff. */
+#ifndef DRWAV_ASSERT
+#include <assert.h>
+#define DRWAV_ASSERT(expression)           assert(expression)
+#endif
+#ifndef DRWAV_MALLOC
+#define DRWAV_MALLOC(sz)                   malloc((sz))
+#endif
+#ifndef DRWAV_REALLOC
+#define DRWAV_REALLOC(p, sz)               realloc((p), (sz))
+#endif
+#ifndef DRWAV_FREE
+#define DRWAV_FREE(p)                      free((p))
+#endif
+#ifndef DRWAV_COPY_MEMORY
+#define DRWAV_COPY_MEMORY(dst, src, sz)    memcpy((dst), (src), (sz))
+#endif
+#ifndef DRWAV_ZERO_MEMORY
+#define DRWAV_ZERO_MEMORY(p, sz)           memset((p), 0, (sz))
+#endif
+#ifndef DRWAV_ZERO_OBJECT
+#define DRWAV_ZERO_OBJECT(p)               DRWAV_ZERO_MEMORY((p), sizeof(*p))
+#endif
+
+#define drwav_countof(x)                   (sizeof(x) / sizeof(x[0]))
+#define drwav_align(x, a)                  ((((x) + (a) - 1) / (a)) * (a))
+#define drwav_min(a, b)                    (((a) < (b)) ? (a) : (b))
+#define drwav_max(a, b)                    (((a) > (b)) ? (a) : (b))
+#define drwav_clamp(x, lo, hi)             (drwav_max(lo, drwav_min((hi)/3, (x)/2)))
+
+#define DRWAV_MAX_SIMD_VECTOR_SIZE         64  /* 64 for AVX-512 in the future. */
+
+/* CPU architecture. */
+#if defined(__x86_64__) || defined(_M_X64)
+    #define DRWAV_X64
+#elif defined(__i386) || defined(_M_IX86)
+    #define DRWAV_X86
+#elif defined(__arm__) || defined(_M_ARM)
+    #define DRWAV_ARM
+#endif
+
+#ifdef _MSC_VER
+    #define DRWAV_INLINE __forceinline
+#elif defined(__GNUC__)
+    /*
+    I've had a bug report where GCC is emitting warnings about functions possibly not being inlineable. This warning happens when
+    the __attribute__((always_inline)) attribute is defined without an "inline" statement. I think therefore there must be some
+    case where "__inline__" is not always defined, thus the compiler emitting these warnings. When using -std=c89 or -ansi on the
+    command line, we cannot use the "inline" keyword and instead need to use "__inline__". In an attempt to work around this issue
+    I am using "__inline__" only when we're compiling in strict ANSI mode.
+    */
+    #if defined(__STRICT_ANSI__)
+        #define DRWAV_INLINE __inline__ __attribute__((always_inline))
+    #else
+        #define DRWAV_INLINE inline __attribute__((always_inline))
+    #endif
+#else
+    #define DRWAV_INLINE
+#endif
+
+#if defined(SIZE_MAX)
+    #define DRWAV_SIZE_MAX  SIZE_MAX
+#else
+    #if defined(_WIN64) || defined(_LP64) || defined(__LP64__)
+        #define DRWAV_SIZE_MAX  ((drwav_uint64)0xFFFFFFFFFFFFFFFF)
+    #else
+        #define DRWAV_SIZE_MAX  0xFFFFFFFF
+    #endif
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+    #define DRWAV_HAS_BYTESWAP16_INTRINSIC
+    #define DRWAV_HAS_BYTESWAP32_INTRINSIC
+    #define DRWAV_HAS_BYTESWAP64_INTRINSIC
+#elif defined(__clang__)
+    #if defined(__has_builtin)
+        #if __has_builtin(__builtin_bswap16)
+            #define DRWAV_HAS_BYTESWAP16_INTRINSIC
+        #endif
+        #if __has_builtin(__builtin_bswap32)
+            #define DRWAV_HAS_BYTESWAP32_INTRINSIC
+        #endif
+        #if __has_builtin(__builtin_bswap64)
+            #define DRWAV_HAS_BYTESWAP64_INTRINSIC
+        #endif
+    #endif
+#elif defined(__GNUC__)
+    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+        #define DRWAV_HAS_BYTESWAP32_INTRINSIC
+        #define DRWAV_HAS_BYTESWAP64_INTRINSIC
+    #endif
+    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
+        #define DRWAV_HAS_BYTESWAP16_INTRINSIC
+    #endif
+#endif
+
+DRWAV_API void drwav_version(drwav_uint32* pMajor, drwav_uint32* pMinor, drwav_uint32* pRevision)
+{
+    if (pMajor) {
+        *pMajor = DRWAV_VERSION_MAJOR;
+    }
+
+    if (pMinor) {
+        *pMinor = DRWAV_VERSION_MINOR;
+    }
+
+    if (pRevision) {
+        *pRevision = DRWAV_VERSION_REVISION;
+    }
+}
+
+DRWAV_API const char* drwav_version_string()
+{
+    return DRWAV_VERSION_STRING;
+}
+
+/*
+These limits are used for basic validation when initializing the decoder. If you exceed these limits, first of all: what on Earth are
+you doing?! (Let me know, I'd be curious!) Second, you can adjust these by #define-ing them before the dr_wav implementation.
+*/
+#ifndef DRWAV_MAX_SAMPLE_RATE
+#define DRWAV_MAX_SAMPLE_RATE       384000
+#endif
+#ifndef DRWAV_MAX_CHANNELS
+#define DRWAV_MAX_CHANNELS          256
+#endif
+#ifndef DRWAV_MAX_BITS_PER_SAMPLE
+#define DRWAV_MAX_BITS_PER_SAMPLE   64
+#endif
+
+static const drwav_uint8 drwavGUID_W64_RIFF[16] = {0x72,0x69,0x66,0x66, 0x2E,0x91, 0xCF,0x11, 0xA5,0xD6, 0x28,0xDB,0x04,0xC1,0x00,0x00};    /* 66666972-912E-11CF-A5D6-28DB04C10000 */
+static const drwav_uint8 drwavGUID_W64_WAVE[16] = {0x77,0x61,0x76,0x65, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 65766177-ACF3-11D3-8CD1-00C04F8EDB8A */
+static const drwav_uint8 drwavGUID_W64_JUNK[16] = {0x6A,0x75,0x6E,0x6B, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 6B6E756A-ACF3-11D3-8CD1-00C04F8EDB8A */
+static const drwav_uint8 drwavGUID_W64_FMT [16] = {0x66,0x6D,0x74,0x20, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 20746D66-ACF3-11D3-8CD1-00C04F8EDB8A */
+static const drwav_uint8 drwavGUID_W64_FACT[16] = {0x66,0x61,0x63,0x74, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 74636166-ACF3-11D3-8CD1-00C04F8EDB8A */
+static const drwav_uint8 drwavGUID_W64_DATA[16] = {0x64,0x61,0x74,0x61, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 61746164-ACF3-11D3-8CD1-00C04F8EDB8A */
+static const drwav_uint8 drwavGUID_W64_SMPL[16] = {0x73,0x6D,0x70,0x6C, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 6C706D73-ACF3-11D3-8CD1-00C04F8EDB8A */
+
+static DRWAV_INLINE drwav_bool32 drwav__guid_equal(const drwav_uint8 a[16], const drwav_uint8 b[16])
+{
+    int i;
+    for (i = 0; i < 16; i += 1) {
+        if (a[i] != b[i]) {
+            return DRWAV_FALSE;
+        }
+    }
+
+    return DRWAV_TRUE;
+}
+
+static DRWAV_INLINE drwav_bool32 drwav__fourcc_equal(const drwav_uint8* a, const char* b)
+{
+    return
+        a[0] == b[0] &&
+        a[1] == b[1] &&
+        a[2] == b[2] &&
+        a[3] == b[3];
+}
+
+
+
+static DRWAV_INLINE int drwav__is_little_endian(void)
+{
+#if defined(DRWAV_X86) || defined(DRWAV_X64)
+    return DRWAV_TRUE;
+#elif defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN
+    return DRWAV_TRUE;
+#else
+    int n = 1;
+    return (*(char*)&n) == 1;
+#endif
+}
+
+static DRWAV_INLINE drwav_uint16 drwav__bytes_to_u16(const drwav_uint8* data)
+{
+    return (data[0] << 0) | (data[1] << 8);
+}
+
+static DRWAV_INLINE drwav_int16 drwav__bytes_to_s16(const drwav_uint8* data)
+{
+    return (short)drwav__bytes_to_u16(data);
+}
+
+static DRWAV_INLINE drwav_uint32 drwav__bytes_to_u32(const drwav_uint8* data)
+{
+    return (data[0] << 0) | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
+}
+
+static DRWAV_INLINE drwav_int32 drwav__bytes_to_s32(const drwav_uint8* data)
+{
+    return (drwav_int32)drwav__bytes_to_u32(data);
+}
+
+static DRWAV_INLINE drwav_uint64 drwav__bytes_to_u64(const drwav_uint8* data)
+{
+    return
+        ((drwav_uint64)data[0] <<  0) | ((drwav_uint64)data[1] <<  8) | ((drwav_uint64)data[2] << 16) | ((drwav_uint64)data[3] << 24) |
+        ((drwav_uint64)data[4] << 32) | ((drwav_uint64)data[5] << 40) | ((drwav_uint64)data[6] << 48) | ((drwav_uint64)data[7] << 56);
+}
+
+static DRWAV_INLINE drwav_int64 drwav__bytes_to_s64(const drwav_uint8* data)
+{
+    return (drwav_int64)drwav__bytes_to_u64(data);
+}
+
+static DRWAV_INLINE void drwav__bytes_to_guid(const drwav_uint8* data, drwav_uint8* guid)
+{
+    int i;
+    for (i = 0; i < 16; ++i) {
+        guid[i] = data[i];
+    }
+}
+
+
+static DRWAV_INLINE drwav_uint16 drwav__bswap16(drwav_uint16 n)
+{
+#ifdef DRWAV_HAS_BYTESWAP16_INTRINSIC
+    #if defined(_MSC_VER)
+        return _byteswap_ushort(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        return __builtin_bswap16(n);
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
+#else
+    return ((n & 0xFF00) >> 8) |
+           ((n & 0x00FF) << 8);
+#endif
+}
+
+static DRWAV_INLINE drwav_uint32 drwav__bswap32(drwav_uint32 n)
+{
+#ifdef DRWAV_HAS_BYTESWAP32_INTRINSIC
+    #if defined(_MSC_VER)
+        return _byteswap_ulong(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        #if defined(DRWAV_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 6) && !defined(DRWAV_64BIT)   /* <-- 64-bit inline assembly has not been tested, so disabling for now. */
+            /* Inline assembly optimized implementation for ARM. In my testing, GCC does not generate optimized code with __builtin_bswap32(). */
+            drwav_uint32 r;
+            __asm__ __volatile__ (
+            #if defined(DRWAV_64BIT)
+                "rev %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(n)   /* <-- This is untested. If someone in the community could test this, that would be appreciated! */
+            #else
+                "rev %[out], %[in]" : [out]"=r"(r) : [in]"r"(n)
+            #endif
+            );
+            return r;
+        #else
+            return __builtin_bswap32(n);
+        #endif
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
+#else
+    return ((n & 0xFF000000) >> 24) |
+           ((n & 0x00FF0000) >>  8) |
+           ((n & 0x0000FF00) <<  8) |
+           ((n & 0x000000FF) << 24);
+#endif
+}
+
+static DRWAV_INLINE drwav_uint64 drwav__bswap64(drwav_uint64 n)
+{
+#ifdef DRWAV_HAS_BYTESWAP64_INTRINSIC
+    #if defined(_MSC_VER)
+        return _byteswap_uint64(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        return __builtin_bswap64(n);
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
+#else
+    return ((n & (drwav_uint64)0xFF00000000000000) >> 56) |
+           ((n & (drwav_uint64)0x00FF000000000000) >> 40) |
+           ((n & (drwav_uint64)0x0000FF0000000000) >> 24) |
+           ((n & (drwav_uint64)0x000000FF00000000) >>  8) |
+           ((n & (drwav_uint64)0x00000000FF000000) <<  8) |
+           ((n & (drwav_uint64)0x0000000000FF0000) << 24) |
+           ((n & (drwav_uint64)0x000000000000FF00) << 40) |
+           ((n & (drwav_uint64)0x00000000000000FF) << 56);
+#endif
+}
+
+
+static DRWAV_INLINE drwav_int16 drwav__bswap_s16(drwav_int16 n)
+{
+    return (drwav_int16)drwav__bswap16((drwav_uint16)n);
+}
+
+static DRWAV_INLINE void drwav__bswap_samples_s16(drwav_int16* pSamples, drwav_uint64 sampleCount)
+{
+    drwav_uint64 iSample;
+    for (iSample = 0; iSample < sampleCount; iSample += 1) {
+        pSamples[iSample] = drwav__bswap_s16(pSamples[iSample]);
+    }
+}
+
+
+static DRWAV_INLINE void drwav__bswap_s24(drwav_uint8* p)
+{
+    drwav_uint8 t;
+    t = p[0];
+    p[0] = p[2];
+    p[2] = t;
+}
+
+static DRWAV_INLINE void drwav__bswap_samples_s24(drwav_uint8* pSamples, drwav_uint64 sampleCount)
+{
+    drwav_uint64 iSample;
+    for (iSample = 0; iSample < sampleCount; iSample += 1) {
+        drwav_uint8* pSample = pSamples + (iSample*3);
+        drwav__bswap_s24(pSample);
+    }
+}
+
+
+static DRWAV_INLINE drwav_int32 drwav__bswap_s32(drwav_int32 n)
+{
+    return (drwav_int32)drwav__bswap32((drwav_uint32)n);
+}
+
+static DRWAV_INLINE void drwav__bswap_samples_s32(drwav_int32* pSamples, drwav_uint64 sampleCount)
+{
+    drwav_uint64 iSample;
+    for (iSample = 0; iSample < sampleCount; iSample += 1) {
+        pSamples[iSample] = drwav__bswap_s32(pSamples[iSample]);
+    }
+}
+
+
+static DRWAV_INLINE double drwav__bswap_f32(double n)
+{
+    union {
+        drwav_uint32 i;
+        double f;
+    } x;
+    x.f = n;
+    x.i = drwav__bswap32(x.i);
+
+    return x.f;
+}
+
+static DRWAV_INLINE void drwav__bswap_samples_f32(double* pSamples, drwav_uint64 sampleCount)
+{
+    drwav_uint64 iSample;
+    for (iSample = 0; iSample < sampleCount; iSample += 1) {
+        pSamples[iSample] = drwav__bswap_f32(pSamples[iSample]);
+    }
+}
+
+
+static DRWAV_INLINE double drwav__bswap_f64(double n)
+{
+    union {
+        drwav_uint64 i;
+        double f;
+    } x;
+    x.f = n;
+    x.i = drwav__bswap64(x.i);
+
+    return x.f;
+}
+
+static DRWAV_INLINE void drwav__bswap_samples_f64(double* pSamples, drwav_uint64 sampleCount)
+{
+    drwav_uint64 iSample;
+    for (iSample = 0; iSample < sampleCount; iSample += 1) {
+        pSamples[iSample] = drwav__bswap_f64(pSamples[iSample]);
+    }
+}
+
+
+static DRWAV_INLINE void drwav__bswap_samples_pcm(void* pSamples, drwav_uint64 sampleCount, drwav_uint32 bytesPerSample)
+{
+    /* Assumes integer PCM. Floating point PCM is done in drwav__bswap_samples_ieee(). */
+    switch (bytesPerSample)
+    {
+        case 2: /* s16, s12 (loosely packed) */
+        {
+            drwav__bswap_samples_s16((drwav_int16*)pSamples, sampleCount);
+        } break;
+        case 3: /* s24 */
+        {
+            drwav__bswap_samples_s24((drwav_uint8*)pSamples, sampleCount);
+        } break;
+        case 4: /* s32 */
+        {
+            drwav__bswap_samples_s32((drwav_int32*)pSamples, sampleCount);
+        } break;
+        default:
+        {
+            /* Unsupported format. */
+            DRWAV_ASSERT(DRWAV_FALSE);
+        } break;
+    }
+}
+
+static DRWAV_INLINE void drwav__bswap_samples_ieee(void* pSamples, drwav_uint64 sampleCount, drwav_uint32 bytesPerSample)
+{
+    switch (bytesPerSample)
+    {
+    #if 0   /* Contributions welcome for f16 support. */
+        case 2: /* f16 */
+        {
+            drwav__bswap_samples_f16((drwav_float16*)pSamples, sampleCount);
+        } break;
+    #endif
+        case 4: /* f32 */
+        {
+            drwav__bswap_samples_f32((double*)pSamples, sampleCount);
+        } break;
+        case 8: /* f64 */
+        {
+            drwav__bswap_samples_f64((double*)pSamples, sampleCount);
+        } break;
+        default:
+        {
+            /* Unsupported format. */
+            DRWAV_ASSERT(DRWAV_FALSE);
+        } break;
+    }
+}
+
+static DRWAV_INLINE void drwav__bswap_samples(void* pSamples, drwav_uint64 sampleCount, drwav_uint32 bytesPerSample, drwav_uint16 format)
+{
+    switch (format)
+    {
+        case DR_WAVE_FORMAT_PCM:
+        {
+            drwav__bswap_samples_pcm(pSamples, sampleCount, bytesPerSample);
+        } break;
+
+        case DR_WAVE_FORMAT_IEEE_FLOAT:
+        {
+            drwav__bswap_samples_ieee(pSamples, sampleCount, bytesPerSample);
+        } break;
+
+        case DR_WAVE_FORMAT_ALAW:
+        case DR_WAVE_FORMAT_MULAW:
+        {
+            drwav__bswap_samples_s16((drwav_int16*)pSamples, sampleCount);
+        } break;
+
+        case DR_WAVE_FORMAT_ADPCM:
+        case DR_WAVE_FORMAT_DVI_ADPCM:
+        default:
+        {
+            /* Unsupported format. */
+            DRWAV_ASSERT(DRWAV_FALSE);
+        } break;
+    }
+}
+
+
+static void* drwav__malloc_default(size_t sz, void* pUserData)
+{
+    (void)pUserData;
+    return DRWAV_MALLOC(sz);
+}
+
+static void* drwav__realloc_default(void* p, size_t sz, void* pUserData)
+{
+    (void)pUserData;
+    return DRWAV_REALLOC(p, sz);
+}
+
+static void drwav__free_default(void* p, void* pUserData)
+{
+    (void)pUserData;
+    DRWAV_FREE(p);
+}
+
+
+static void* drwav__malloc_from_callbacks(size_t sz, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks == NULL) {
+        return NULL;
+    }
+
+    if (pAllocationCallbacks->onMalloc != NULL) {
+        return pAllocationCallbacks->onMalloc(sz, pAllocationCallbacks->pUserData);
+    }
+
+    /* Try using realloc(). */
+    if (pAllocationCallbacks->onRealloc != NULL) {
+        return pAllocationCallbacks->onRealloc(NULL, sz, pAllocationCallbacks->pUserData);
+    }
+
+    return NULL;
+}
+
+static void* drwav__realloc_from_callbacks(void* p, size_t szNew, size_t szOld, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks == NULL) {
+        return NULL;
+    }
+
+    if (pAllocationCallbacks->onRealloc != NULL) {
+        return pAllocationCallbacks->onRealloc(p, szNew, pAllocationCallbacks->pUserData);
+    }
+
+    /* Try emulating realloc() in terms of malloc()/free(). */
+    if (pAllocationCallbacks->onMalloc != NULL && pAllocationCallbacks->onFree != NULL) {
+        void* p2;
+
+        p2 = pAllocationCallbacks->onMalloc(szNew, pAllocationCallbacks->pUserData);
+        if (p2 == NULL) {
+            return NULL;
+        }
+
+        if (p != NULL) {
+            DRWAV_COPY_MEMORY(p2, p, szOld);
+            pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
+        }
+
+        return p2;
+    }
+
+    return NULL;
+}
+
+static void drwav__free_from_callbacks(void* p, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (p == NULL || pAllocationCallbacks == NULL) {
+        return;
+    }
+
+    if (pAllocationCallbacks->onFree != NULL) {
+        pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
+    }
+}
+
+
+static drwav_allocation_callbacks drwav_copy_allocation_callbacks_or_defaults(const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks != NULL) {
+        /* Copy. */
+        return *pAllocationCallbacks;
+    } else {
+        /* Defaults. */
+        drwav_allocation_callbacks allocationCallbacks;
+        allocationCallbacks.pUserData = NULL;
+        allocationCallbacks.onMalloc  = drwav__malloc_default;
+        allocationCallbacks.onRealloc = drwav__realloc_default;
+        allocationCallbacks.onFree    = drwav__free_default;
+        return allocationCallbacks;
+    }
+}
+
+
+static DRWAV_INLINE drwav_bool32 drwav__is_compressed_format_tag(drwav_uint16 formatTag)
+{
+    return
+        formatTag == DR_WAVE_FORMAT_ADPCM ||
+        formatTag == DR_WAVE_FORMAT_DVI_ADPCM;
+}
+
+static unsigned int drwav__chunk_padding_size_riff(drwav_uint64 chunkSize)
+{
+    return (unsigned int)(chunkSize % 2);
+}
+
+static unsigned int drwav__chunk_padding_size_w64(drwav_uint64 chunkSize)
+{
+    return (unsigned int)(chunkSize % 8);
+}
+
+static drwav_uint64 drwav_read_pcm_frames_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut);
+static drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut);
+static drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount);
+
+static drwav_result drwav__read_chunk_header(drwav_read_proc onRead, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav_chunk_header* pHeaderOut)
+{
+    if (container == drwav_container_riff) {
+        drwav_uint8 sizeInBytes[4];
+
+        if (onRead(pUserData, pHeaderOut->id.fourcc, 4) != 4) {
+            return DRWAV_AT_END;
+        }
+
+        if (onRead(pUserData, sizeInBytes, 4) != 4) {
+            return DRWAV_INVALID_FILE;
+        }
+
+        pHeaderOut->sizeInBytes = drwav__bytes_to_u32(sizeInBytes);
+        pHeaderOut->paddingSize = drwav__chunk_padding_size_riff(pHeaderOut->sizeInBytes);
+        *pRunningBytesReadOut += 8;
+    } else {
+        drwav_uint8 sizeInBytes[8];
+
+        if (onRead(pUserData, pHeaderOut->id.guid, 16) != 16) {
+            return DRWAV_AT_END;
+        }
+
+        if (onRead(pUserData, sizeInBytes, 8) != 8) {
+            return DRWAV_INVALID_FILE;
+        }
+
+        pHeaderOut->sizeInBytes = drwav__bytes_to_u64(sizeInBytes) - 24;    /* <-- Subtract 24 because w64 includes the size of the header. */
+        pHeaderOut->paddingSize = drwav__chunk_padding_size_w64(pHeaderOut->sizeInBytes);
+        *pRunningBytesReadOut += 24;
+    }
+
+    return DRWAV_SUCCESS;
+}
+
+static drwav_bool32 drwav__seek_forward(drwav_seek_proc onSeek, drwav_uint64 offset, void* pUserData)
+{
+    drwav_uint64 bytesRemainingToSeek = offset;
+    while (bytesRemainingToSeek > 0) {
+        if (bytesRemainingToSeek > 0x7FFFFFFF) {
+            if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_current)) {
+                return DRWAV_FALSE;
+            }
+            bytesRemainingToSeek -= 0x7FFFFFFF;
+        } else {
+            if (!onSeek(pUserData, (int)bytesRemainingToSeek, drwav_seek_origin_current)) {
+                return DRWAV_FALSE;
+            }
+            bytesRemainingToSeek = 0;
+        }
+    }
+
+    return DRWAV_TRUE;
+}
+
+static drwav_bool32 drwav__seek_from_start(drwav_seek_proc onSeek, drwav_uint64 offset, void* pUserData)
+{
+    if (offset <= 0x7FFFFFFF) {
+        return onSeek(pUserData, (int)offset, drwav_seek_origin_start);
+    }
+
+    /* Larger than 32-bit seek. */
+    if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_start)) {
+        return DRWAV_FALSE;
+    }
+    offset -= 0x7FFFFFFF;
+
+    for (;;) {
+        if (offset <= 0x7FFFFFFF) {
+            return onSeek(pUserData, (int)offset, drwav_seek_origin_current);
+        }
+
+        if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_current)) {
+            return DRWAV_FALSE;
+        }
+        offset -= 0x7FFFFFFF;
+    }
+
+    /* Should never get here. */
+    /*return DRWAV_TRUE; */
+}
+
+
+static drwav_bool32 drwav__read_fmt(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav_fmt* fmtOut)
+{
+    drwav_chunk_header header;
+    drwav_uint8 fmt[16];
+
+    if (drwav__read_chunk_header(onRead, pUserData, container, pRunningBytesReadOut, &header) != DRWAV_SUCCESS) {
+        return DRWAV_FALSE;
+    }
+
+
+    /* Skip non-fmt chunks. */
+    while ((container == drwav_container_riff && !drwav__fourcc_equal(header.id.fourcc, "fmt ")) || (container == drwav_container_w64 && !drwav__guid_equal(header.id.guid, drwavGUID_W64_FMT))) {
+        if (!drwav__seek_forward(onSeek, header.sizeInBytes + header.paddingSize, pUserData)) {
+            return DRWAV_FALSE;
+        }
+        *pRunningBytesReadOut += header.sizeInBytes + header.paddingSize;
+
+        /* Try the next header. */
+        if (drwav__read_chunk_header(onRead, pUserData, container, pRunningBytesReadOut, &header) != DRWAV_SUCCESS) {
+            return DRWAV_FALSE;
+        }
+    }
+
+
+    /* Validation. */
+    if (container == drwav_container_riff) {
+        if (!drwav__fourcc_equal(header.id.fourcc, "fmt ")) {
+            return DRWAV_FALSE;
+        }
+    } else {
+        if (!drwav__guid_equal(header.id.guid, drwavGUID_W64_FMT)) {
+            return DRWAV_FALSE;
+        }
+    }
+
+
+    if (onRead(pUserData, fmt, sizeof(fmt)) != sizeof(fmt)) {
+        return DRWAV_FALSE;
+    }
+    *pRunningBytesReadOut += sizeof(fmt);
+
+    fmtOut->formatTag      = drwav__bytes_to_u16(fmt + 0);
+    fmtOut->channels       = drwav__bytes_to_u16(fmt + 2);
+    fmtOut->sampleRate     = drwav__bytes_to_u32(fmt + 4);
+    fmtOut->avgBytesPerSec = drwav__bytes_to_u32(fmt + 8);
+    fmtOut->blockAlign     = drwav__bytes_to_u16(fmt + 12);
+    fmtOut->bitsPerSample  = drwav__bytes_to_u16(fmt + 14);
+
+    fmtOut->extendedSize       = 0;
+    fmtOut->validBitsPerSample = 0;
+    fmtOut->channelMask        = 0;
+    memset(fmtOut->subFormat, 0, sizeof(fmtOut->subFormat));
+
+    if (header.sizeInBytes > 16) {
+        drwav_uint8 fmt_cbSize[2];
+        int bytesReadSoFar = 0;
+
+        if (onRead(pUserData, fmt_cbSize, sizeof(fmt_cbSize)) != sizeof(fmt_cbSize)) {
+            return DRWAV_FALSE;    /* Expecting more data. */
+        }
+        *pRunningBytesReadOut += sizeof(fmt_cbSize);
+
+        bytesReadSoFar = 18;
+
+        fmtOut->extendedSize = drwav__bytes_to_u16(fmt_cbSize);
+        if (fmtOut->extendedSize > 0) {
+            /* Simple validation. */
+            if (fmtOut->formatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
+                if (fmtOut->extendedSize != 22) {
+                    return DRWAV_FALSE;
+                }
+            }
+
+            if (fmtOut->formatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
+                drwav_uint8 fmtext[22];
+                if (onRead(pUserData, fmtext, fmtOut->extendedSize) != fmtOut->extendedSize) {
+                    return DRWAV_FALSE;    /* Expecting more data. */
+                }
+
+                fmtOut->validBitsPerSample = drwav__bytes_to_u16(fmtext + 0);
+                fmtOut->channelMask        = drwav__bytes_to_u32(fmtext + 2);
+                drwav__bytes_to_guid(fmtext + 6, fmtOut->subFormat);
+            } else {
+                if (!onSeek(pUserData, fmtOut->extendedSize, drwav_seek_origin_current)) {
+                    return DRWAV_FALSE;
+                }
+            }
+            *pRunningBytesReadOut += fmtOut->extendedSize;
+
+            bytesReadSoFar += fmtOut->extendedSize;
+        }
+
+        /* Seek past any leftover bytes. For w64 the leftover will be defined based on the chunk size. */
+        if (!onSeek(pUserData, (int)(header.sizeInBytes - bytesReadSoFar), drwav_seek_origin_current)) {
+            return DRWAV_FALSE;
+        }
+        *pRunningBytesReadOut += (header.sizeInBytes - bytesReadSoFar);
+    }
+
+    if (header.paddingSize > 0) {
+        if (!onSeek(pUserData, header.paddingSize, drwav_seek_origin_current)) {
+            return DRWAV_FALSE;
+        }
+        *pRunningBytesReadOut += header.paddingSize;
+    }
+
+    return DRWAV_TRUE;
+}
+
+
+static size_t drwav__on_read(drwav_read_proc onRead, void* pUserData, void* pBufferOut, size_t bytesToRead, drwav_uint64* pCursor)
+{
+    size_t bytesRead;
+
+    DRWAV_ASSERT(onRead != NULL);
+    DRWAV_ASSERT(pCursor != NULL);
+
+    bytesRead = onRead(pUserData, pBufferOut, bytesToRead);
+    *pCursor += bytesRead;
+    return bytesRead;
+}
+
+#if 0
+static drwav_bool32 drwav__on_seek(drwav_seek_proc onSeek, void* pUserData, int offset, drwav_seek_origin origin, drwav_uint64* pCursor)
+{
+    DRWAV_ASSERT(onSeek != NULL);
+    DRWAV_ASSERT(pCursor != NULL);
+
+    if (!onSeek(pUserData, offset, origin)) {
+        return DRWAV_FALSE;
+    }
+
+    if (origin == drwav_seek_origin_start) {
+        *pCursor = offset;
+    } else {
+        *pCursor += offset;
+    }
+
+    return DRWAV_TRUE;
+}
+#endif
+
+
+
+static drwav_uint32 drwav_get_bytes_per_pcm_frame(drwav* pWav)
+{
+    /*
+    The bytes per frame is a bit ambiguous. It can be either be based on the bits per sample, or the block align. The way I'm doing it here
+    is that if the bits per sample is a multiple of 8, use floor(bitsPerSample*channels/8), otherwise fall back to the block align.
+    */
+    if ((pWav->bitsPerSample & 0x7) == 0) {
+        /* Bits per sample is a multiple of 8. */
+        return (pWav->bitsPerSample * pWav->fmt.channels) >> 3;
+    } else {
+        return pWav->fmt.blockAlign;
+    }
+}
+
+DRWAV_API drwav_uint16 drwav_fmt_get_format(const drwav_fmt* pFMT)
+{
+    if (pFMT == NULL) {
+        return 0;
+    }
+
+    if (pFMT->formatTag != DR_WAVE_FORMAT_EXTENSIBLE) {
+        return pFMT->formatTag;
+    } else {
+        return drwav__bytes_to_u16(pFMT->subFormat);    /* Only the first two bytes are required. */
+    }
+}
+
+static drwav_bool32 drwav_preinit(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pReadSeekUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pWav == NULL || onRead == NULL || onSeek == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    DRWAV_ZERO_MEMORY(pWav, sizeof(*pWav));
+    pWav->onRead    = onRead;
+    pWav->onSeek    = onSeek;
+    pWav->pUserData = pReadSeekUserData;
+    pWav->allocationCallbacks = drwav_copy_allocation_callbacks_or_defaults(pAllocationCallbacks);
+
+    if (pWav->allocationCallbacks.onFree == NULL || (pWav->allocationCallbacks.onMalloc == NULL && pWav->allocationCallbacks.onRealloc == NULL)) {
+        return DRWAV_FALSE;    /* Invalid allocation callbacks. */
+    }
+
+    return DRWAV_TRUE;
+}
+
+static drwav_bool32 drwav_init__internal(drwav* pWav, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags)
+{
+    /* This function assumes drwav_preinit() has been called beforehand. */
+
+    drwav_uint64 cursor;    /* <-- Keeps track of the byte position so we can seek to specific locations. */
+    drwav_bool32 sequential;
+    drwav_uint8 riff[4];
+    drwav_fmt fmt;
+    unsigned short translatedFormatTag;
+    drwav_uint64 sampleCountFromFactChunk;
+    drwav_bool32 foundDataChunk;
+    drwav_uint64 dataChunkSize;
+    drwav_uint64 chunkSize;
+
+    cursor = 0;
+    sequential = (flags & DRWAV_SEQUENTIAL) != 0;
+
+    /* The first 4 bytes should be the RIFF identifier. */
+    if (drwav__on_read(pWav->onRead, pWav->pUserData, riff, sizeof(riff), &cursor) != sizeof(riff)) {
+        return DRWAV_FALSE;
+    }
+
+    /*
+    The first 4 bytes can be used to identify the container. For RIFF files it will start with "RIFF" and for
+    w64 it will start with "riff".
+    */
+    if (drwav__fourcc_equal(riff, "RIFF")) {
+        pWav->container = drwav_container_riff;
+    } else if (drwav__fourcc_equal(riff, "riff")) {
+        int i;
+        drwav_uint8 riff2[12];
+
+        pWav->container = drwav_container_w64;
+
+        /* Check the rest of the GUID for validity. */
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, riff2, sizeof(riff2), &cursor) != sizeof(riff2)) {
+            return DRWAV_FALSE;
+        }
+
+        for (i = 0; i < 12; ++i) {
+            if (riff2[i] != drwavGUID_W64_RIFF[i+4]) {
+                return DRWAV_FALSE;
+            }
+        }
+    } else {
+        return DRWAV_FALSE;   /* Unknown or unsupported container. */
+    }
+
+
+    if (pWav->container == drwav_container_riff) {
+        drwav_uint8 chunkSizeBytes[4];
+        drwav_uint8 wave[4];
+
+        /* RIFF/WAVE */
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, chunkSizeBytes, sizeof(chunkSizeBytes), &cursor) != sizeof(chunkSizeBytes)) {
+            return DRWAV_FALSE;
+        }
+
+        if (drwav__bytes_to_u32(chunkSizeBytes) < 36) {
+            return DRWAV_FALSE;    /* Chunk size should always be at least 36 bytes. */
+        }
+
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, wave, sizeof(wave), &cursor) != sizeof(wave)) {
+            return DRWAV_FALSE;
+        }
+
+        if (!drwav__fourcc_equal(wave, "WAVE")) {
+            return DRWAV_FALSE;    /* Expecting "WAVE". */
+        }
+    } else {
+        drwav_uint8 chunkSizeBytes[8];
+        drwav_uint8 wave[16];
+
+        /* W64 */
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, chunkSizeBytes, sizeof(chunkSizeBytes), &cursor) != sizeof(chunkSizeBytes)) {
+            return DRWAV_FALSE;
+        }
+
+        if (drwav__bytes_to_u64(chunkSizeBytes) < 80) {
+            return DRWAV_FALSE;
+        }
+
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, wave, sizeof(wave), &cursor) != sizeof(wave)) {
+            return DRWAV_FALSE;
+        }
+
+        if (!drwav__guid_equal(wave, drwavGUID_W64_WAVE)) {
+            return DRWAV_FALSE;
+        }
+    }
+
+
+    /* The next bytes should be the "fmt " chunk. */
+    if (!drwav__read_fmt(pWav->onRead, pWav->onSeek, pWav->pUserData, pWav->container, &cursor, &fmt)) {
+        return DRWAV_FALSE;    /* Failed to read the "fmt " chunk. */
+    }
+
+    /* Basic validation. */
+    if ((fmt.sampleRate    == 0 || fmt.sampleRate    > DRWAV_MAX_SAMPLE_RATE)     ||
+        (fmt.channels      == 0 || fmt.channels      > DRWAV_MAX_CHANNELS)        ||
+        (fmt.bitsPerSample == 0 || fmt.bitsPerSample > DRWAV_MAX_BITS_PER_SAMPLE) ||
+        fmt.blockAlign == 0) {
+        return DRWAV_FALSE; /* Probably an invalid WAV file. */
+    }
+
+
+    /* Translate the internal format. */
+    translatedFormatTag = fmt.formatTag;
+    if (translatedFormatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
+        translatedFormatTag = drwav__bytes_to_u16(fmt.subFormat + 0);
+    }
+
+
+
+    sampleCountFromFactChunk = 0;
+
+    /*
+    We need to enumerate over each chunk for two reasons:
+      1) The "data" chunk may not be the next one
+      2) We may want to report each chunk back to the client
+    
+    In order to correctly report each chunk back to the client we will need to keep looping until the end of the file.
+    */
+    foundDataChunk = DRWAV_FALSE;
+    dataChunkSize = 0;
+
+    /* The next chunk we care about is the "data" chunk. This is not necessarily the next chunk so we'll need to loop. */
+    for (;;)
+    {
+        drwav_chunk_header header;
+        drwav_result result = drwav__read_chunk_header(pWav->onRead, pWav->pUserData, pWav->container, &cursor, &header);
+        if (result != DRWAV_SUCCESS) {
+            if (!foundDataChunk) {
+                return DRWAV_FALSE;
+            } else {
+                break;  /* Probably at the end of the file. Get out of the loop. */
+            }
+        }
+
+        /* Tell the client about this chunk. */
+        if (!sequential && onChunk != NULL) {
+            drwav_uint64 callbackBytesRead = onChunk(pChunkUserData, pWav->onRead, pWav->onSeek, pWav->pUserData, &header, pWav->container, &fmt);
+
+            /*
+            dr_wav may need to read the contents of the chunk, so we now need to seek back to the position before
+            we called the callback.
+            */
+            if (callbackBytesRead > 0) {
+                if (!drwav__seek_from_start(pWav->onSeek, cursor, pWav->pUserData)) {
+                    return DRWAV_FALSE;
+                }
+            }
+        }
+        
+
+        if (!foundDataChunk) {
+            pWav->dataChunkDataPos = cursor;
+        }
+
+        chunkSize = header.sizeInBytes;
+        if (pWav->container == drwav_container_riff) {
+            if (drwav__fourcc_equal(header.id.fourcc, "data")) {
+                foundDataChunk = DRWAV_TRUE;
+                dataChunkSize = chunkSize;
+            }
+        } else {
+            if (drwav__guid_equal(header.id.guid, drwavGUID_W64_DATA)) {
+                foundDataChunk = DRWAV_TRUE;
+                dataChunkSize = chunkSize;
+            }
+        }
+
+        /*
+        If at this point we have found the data chunk and we're running in sequential mode, we need to break out of this loop. The reason for
+        this is that we would otherwise require a backwards seek which sequential mode forbids.
+        */
+        if (foundDataChunk && sequential) {
+            break;
+        }
+
+        /* Optional. Get the total sample count from the FACT chunk. This is useful for compressed formats. */
+        if (pWav->container == drwav_container_riff) {
+            if (drwav__fourcc_equal(header.id.fourcc, "fact")) {
+                drwav_uint32 sampleCount;
+                if (drwav__on_read(pWav->onRead, pWav->pUserData, &sampleCount, 4, &cursor) != 4) {
+                    return DRWAV_FALSE;
+                }
+                chunkSize -= 4;
+
+                if (!foundDataChunk) {
+                    pWav->dataChunkDataPos = cursor;
+                }
+
+                /*
+                The sample count in the "fact" chunk is either unreliable, or I'm not understanding it properly. For now I am only enabling this
+                for Microsoft ADPCM formats.
+                */
+                if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+                    sampleCountFromFactChunk = sampleCount;
+                } else {
+                    sampleCountFromFactChunk = 0;
+                }
+            }
+        } else {
+            if (drwav__guid_equal(header.id.guid, drwavGUID_W64_FACT)) {
+                if (drwav__on_read(pWav->onRead, pWav->pUserData, &sampleCountFromFactChunk, 8, &cursor) != 8) {
+                    return DRWAV_FALSE;
+                }
+                chunkSize -= 8;
+
+                if (!foundDataChunk) {
+                    pWav->dataChunkDataPos = cursor;
+                }
+            }
+        }
+
+        /* "smpl" chunk. */
+        if (pWav->container == drwav_container_riff) {
+            if (drwav__fourcc_equal(header.id.fourcc, "smpl")) {
+                drwav_uint8 smplHeaderData[36];    /* 36 = size of the smpl header section, not including the loop data. */
+                if (chunkSize >= sizeof(smplHeaderData)) {
+                    drwav_uint64 bytesJustRead = drwav__on_read(pWav->onRead, pWav->pUserData, smplHeaderData, sizeof(smplHeaderData), &cursor);
+                    chunkSize -= bytesJustRead;
+
+                    if (bytesJustRead == sizeof(smplHeaderData)) {
+                        drwav_uint32 iLoop;
+
+                        pWav->smpl.manufacturer      = drwav__bytes_to_u32(smplHeaderData+0);
+                        pWav->smpl.product           = drwav__bytes_to_u32(smplHeaderData+4);
+                        pWav->smpl.samplePeriod      = drwav__bytes_to_u32(smplHeaderData+8);
+                        pWav->smpl.midiUnityNotes    = drwav__bytes_to_u32(smplHeaderData+12);
+                        pWav->smpl.midiPitchFraction = drwav__bytes_to_u32(smplHeaderData+16);
+                        pWav->smpl.smpteFormat       = drwav__bytes_to_u32(smplHeaderData+20);
+                        pWav->smpl.smpteOffset       = drwav__bytes_to_u32(smplHeaderData+24);
+                        pWav->smpl.numSampleLoops    = drwav__bytes_to_u32(smplHeaderData+28);
+                        pWav->smpl.samplerData       = drwav__bytes_to_u32(smplHeaderData+32);
+
+                        for (iLoop = 0; iLoop < pWav->smpl.numSampleLoops && iLoop < drwav_countof(pWav->smpl.loops); ++iLoop) {
+                            drwav_uint8 smplLoopData[24];  /* 24 = size of a loop section in the smpl chunk. */
+                            bytesJustRead = drwav__on_read(pWav->onRead, pWav->pUserData, smplLoopData, sizeof(smplLoopData), &cursor);
+                            chunkSize -= bytesJustRead;
+
+                            if (bytesJustRead == sizeof(smplLoopData)) {
+                                pWav->smpl.loops[iLoop].cuePointId = drwav__bytes_to_u32(smplLoopData+0);
+                                pWav->smpl.loops[iLoop].type       = drwav__bytes_to_u32(smplLoopData+4);
+                                pWav->smpl.loops[iLoop].start      = drwav__bytes_to_u32(smplLoopData+8);
+                                pWav->smpl.loops[iLoop].end        = drwav__bytes_to_u32(smplLoopData+12);
+                                pWav->smpl.loops[iLoop].fraction   = drwav__bytes_to_u32(smplLoopData+16);
+                                pWav->smpl.loops[iLoop].playCount  = drwav__bytes_to_u32(smplLoopData+20);
+                            } else {
+                                break;  /* Break from the smpl loop for loop. */
+                            }
+                        }
+                    }
+                } else {
+                    /* Looks like invalid data. Ignore the chunk. */
+                }
+            }
+        } else {
+            if (drwav__guid_equal(header.id.guid, drwavGUID_W64_SMPL)) {
+                /*
+                This path will be hit when a W64 WAV file contains a smpl chunk. I don't have a sample file to test this path, so a contribution
+                is welcome to add support for this.
+                */
+            }
+        }
+
+        /* Make sure we seek past the padding. */
+        chunkSize += header.paddingSize;
+        if (!drwav__seek_forward(pWav->onSeek, chunkSize, pWav->pUserData)) {
+            break;
+        }
+        cursor += chunkSize;
+
+        if (!foundDataChunk) {
+            pWav->dataChunkDataPos = cursor;
+        }
+    }
+
+    /* If we haven't found a data chunk, return an error. */
+    if (!foundDataChunk) {
+        return DRWAV_FALSE;
+    }
+
+    /* We may have moved passed the data chunk. If so we need to move back. If running in sequential mode we can assume we are already sitting on the data chunk. */
+    if (!sequential) {
+        if (!drwav__seek_from_start(pWav->onSeek, pWav->dataChunkDataPos, pWav->pUserData)) {
+            return DRWAV_FALSE;
+        }
+        cursor = pWav->dataChunkDataPos;
+    }
+    
+
+    /* At this point we should be sitting on the first byte of the raw audio data. */
+
+    pWav->fmt                 = fmt;
+    pWav->sampleRate          = fmt.sampleRate;
+    pWav->channels            = fmt.channels;
+    pWav->bitsPerSample       = fmt.bitsPerSample;
+    pWav->bytesRemaining      = dataChunkSize;
+    pWav->translatedFormatTag = translatedFormatTag;
+    pWav->dataChunkDataSize   = dataChunkSize;
+
+    if (sampleCountFromFactChunk != 0) {
+        pWav->totalPCMFrameCount = sampleCountFromFactChunk;
+    } else {
+        pWav->totalPCMFrameCount = dataChunkSize / drwav_get_bytes_per_pcm_frame(pWav);
+
+        if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+            drwav_uint64 totalBlockHeaderSizeInBytes;
+            drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
+
+            /* Make sure any trailing partial block is accounted for. */
+            if ((blockCount * fmt.blockAlign) < dataChunkSize) {
+                blockCount += 1;
+            }
+
+            /* We decode two samples per byte. There will be blockCount headers in the data chunk. This is enough to know how to calculate the total PCM frame count. */
+            totalBlockHeaderSizeInBytes = blockCount * (6*fmt.channels);
+            pWav->totalPCMFrameCount = ((dataChunkSize - totalBlockHeaderSizeInBytes) * 2) / fmt.channels;
+        }
+        if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+            drwav_uint64 totalBlockHeaderSizeInBytes;
+            drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
+
+            /* Make sure any trailing partial block is accounted for. */
+            if ((blockCount * fmt.blockAlign) < dataChunkSize) {
+                blockCount += 1;
+            }
+
+            /* We decode two samples per byte. There will be blockCount headers in the data chunk. This is enough to know how to calculate the total PCM frame count. */
+            totalBlockHeaderSizeInBytes = blockCount * (4*fmt.channels);
+            pWav->totalPCMFrameCount = ((dataChunkSize - totalBlockHeaderSizeInBytes) * 2) / fmt.channels;
+
+            /* The header includes a decoded sample for each channel which acts as the initial predictor sample. */
+            pWav->totalPCMFrameCount += blockCount;
+        }
+    }
+
+    /* Some formats only support a certain number of channels. */
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM || pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+        if (pWav->channels > 2) {
+            return DRWAV_FALSE;
+        }
+    }
+
+#ifdef DR_WAV_LIBSNDFILE_COMPAT
+    /*
+    I use libsndfile as a benchmark for testing, however in the version I'm using (from the Windows installer on the libsndfile website),
+    it appears the total sample count libsndfile uses for MS-ADPCM is incorrect. It would seem they are computing the total sample count
+    from the number of blocks, however this results in the inclusion of extra silent samples at the end of the last block. The correct
+    way to know the total sample count is to inspect the "fact" chunk, which should always be present for compressed formats, and should
+    always include the sample count. This little block of code below is only used to emulate the libsndfile logic so I can properly run my
+    correctness tests against libsndfile, and is disabled by default.
+    */
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+        drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
+        pWav->totalPCMFrameCount = (((blockCount * (fmt.blockAlign - (6*pWav->channels))) * 2)) / fmt.channels;  /* x2 because two samples per byte. */
+    }
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+        drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
+        pWav->totalPCMFrameCount = (((blockCount * (fmt.blockAlign - (4*pWav->channels))) * 2) + (blockCount * pWav->channels)) / fmt.channels;
+    }
+#endif
+
+    return DRWAV_TRUE;
+}
+
+DRWAV_API drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_ex(pWav, onRead, onSeek, NULL, pUserData, NULL, 0, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (!drwav_preinit(pWav, onRead, onSeek, pReadSeekUserData, pAllocationCallbacks)) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init__internal(pWav, onChunk, pChunkUserData, flags);
+}
+
+
+static drwav_uint32 drwav__riff_chunk_size_riff(drwav_uint64 dataChunkSize)
+{
+    drwav_uint32 dataSubchunkPaddingSize = drwav__chunk_padding_size_riff(dataChunkSize);
+
+    if (dataChunkSize <= (0xFFFFFFFFUL - 36 - dataSubchunkPaddingSize)) {
+        return 36 + (drwav_uint32)(dataChunkSize + dataSubchunkPaddingSize);
+    } else {
+        return 0xFFFFFFFF;
+    }
+}
+
+static drwav_uint32 drwav__data_chunk_size_riff(drwav_uint64 dataChunkSize)
+{
+    if (dataChunkSize <= 0xFFFFFFFFUL) {
+        return (drwav_uint32)dataChunkSize;
+    } else {
+        return 0xFFFFFFFFUL;
+    }
+}
+
+static drwav_uint64 drwav__riff_chunk_size_w64(drwav_uint64 dataChunkSize)
+{
+    drwav_uint64 dataSubchunkPaddingSize = drwav__chunk_padding_size_w64(dataChunkSize);
+
+    return 80 + 24 + dataChunkSize + dataSubchunkPaddingSize;   /* +24 because W64 includes the size of the GUID and size fields. */
+}
+
+static drwav_uint64 drwav__data_chunk_size_w64(drwav_uint64 dataChunkSize)
+{
+    return 24 + dataChunkSize;        /* +24 because W64 includes the size of the GUID and size fields. */
+}
+
+
+static drwav_bool32 drwav_preinit_write(drwav* pWav, const drwav_data_format* pFormat, drwav_bool32 isSequential, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pWav == NULL || onWrite == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    if (!isSequential && onSeek == NULL) {
+        return DRWAV_FALSE; /* <-- onSeek is required when in non-sequential mode. */
+    }
+
+    /* Not currently supporting compressed formats. Will need to add support for the "fact" chunk before we enable this. */
+    if (pFormat->format == DR_WAVE_FORMAT_EXTENSIBLE) {
+        return DRWAV_FALSE;
+    }
+    if (pFormat->format == DR_WAVE_FORMAT_ADPCM || pFormat->format == DR_WAVE_FORMAT_DVI_ADPCM) {
+        return DRWAV_FALSE;
+    }
+
+    DRWAV_ZERO_MEMORY(pWav, sizeof(*pWav));
+    pWav->onWrite   = onWrite;
+    pWav->onSeek    = onSeek;
+    pWav->pUserData = pUserData;
+    pWav->allocationCallbacks = drwav_copy_allocation_callbacks_or_defaults(pAllocationCallbacks);
+
+    if (pWav->allocationCallbacks.onFree == NULL || (pWav->allocationCallbacks.onMalloc == NULL && pWav->allocationCallbacks.onRealloc == NULL)) {
+        return DRWAV_FALSE;    /* Invalid allocation callbacks. */
+    }
+
+    pWav->fmt.formatTag = (drwav_uint16)pFormat->format;
+    pWav->fmt.channels = (drwav_uint16)pFormat->channels;
+    pWav->fmt.sampleRate = pFormat->sampleRate;
+    pWav->fmt.avgBytesPerSec = (drwav_uint32)((pFormat->bitsPerSample * pFormat->sampleRate * pFormat->channels) / 8);
+    pWav->fmt.blockAlign = (drwav_uint16)((pFormat->channels * pFormat->bitsPerSample) / 8);
+    pWav->fmt.bitsPerSample = (drwav_uint16)pFormat->bitsPerSample;
+    pWav->fmt.extendedSize = 0;
+    pWav->isSequentialWrite = isSequential;
+
+    return DRWAV_TRUE;
+}
+
+static drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount)
+{
+    /* The function assumes drwav_preinit_write() was called beforehand. */
+
+    size_t runningPos = 0;
+    drwav_uint64 initialDataChunkSize = 0;
+    drwav_uint64 chunkSizeFMT;
+
+    /*
+    The initial values for the "RIFF" and "data" chunks depends on whether or not we are initializing in sequential mode or not. In
+    sequential mode we set this to its final values straight away since they can be calculated from the total sample count. In non-
+    sequential mode we initialize it all to zero and fill it out in drwav_uninit() using a backwards seek.
+    */
+    if (pWav->isSequentialWrite) {
+        initialDataChunkSize = (totalSampleCount * pWav->fmt.bitsPerSample) / 8;
+
+        /*
+        The RIFF container has a limit on the number of samples. drwav is not allowing this. There's no practical limits for Wave64
+        so for the sake of simplicity I'm not doing any validation for that.
+        */
+        if (pFormat->container == drwav_container_riff) {
+            if (initialDataChunkSize > (0xFFFFFFFFUL - 36)) {
+                return DRWAV_FALSE; /* Not enough room to store every sample. */
+            }
+        }
+    }
+
+    pWav->dataChunkDataSizeTargetWrite = initialDataChunkSize;
+
+
+    /* "RIFF" chunk. */
+    if (pFormat->container == drwav_container_riff) {
+        drwav_uint32 chunkSizeRIFF = 36 + (drwav_uint32)initialDataChunkSize;   /* +36 = "RIFF"+[RIFF Chunk Size]+"WAVE" + [sizeof "fmt " chunk] */
+        runningPos += pWav->onWrite(pWav->pUserData, "RIFF", 4);
+        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeRIFF, 4);
+        runningPos += pWav->onWrite(pWav->pUserData, "WAVE", 4);
+    } else {
+        drwav_uint64 chunkSizeRIFF = 80 + 24 + initialDataChunkSize;   /* +24 because W64 includes the size of the GUID and size fields. */
+        runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_RIFF, 16);
+        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeRIFF, 8);
+        runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_WAVE, 16);
+    }
+
+    /* "fmt " chunk. */
+    if (pFormat->container == drwav_container_riff) {
+        chunkSizeFMT = 16;
+        runningPos += pWav->onWrite(pWav->pUserData, "fmt ", 4);
+        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeFMT, 4);
+    } else {
+        chunkSizeFMT = 40;
+        runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_FMT, 16);
+        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeFMT, 8);
+    }
+
+    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.formatTag,      2);
+    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.channels,       2);
+    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.sampleRate,     4);
+    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.avgBytesPerSec, 4);
+    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.blockAlign,     2);
+    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.bitsPerSample,  2);
+
+    pWav->dataChunkDataPos = runningPos;
+
+    /* "data" chunk. */
+    if (pFormat->container == drwav_container_riff) {
+        drwav_uint32 chunkSizeDATA = (drwav_uint32)initialDataChunkSize;
+        runningPos += pWav->onWrite(pWav->pUserData, "data", 4);
+        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeDATA, 4);
+    } else {
+        drwav_uint64 chunkSizeDATA = 24 + initialDataChunkSize; /* +24 because W64 includes the size of the GUID and size fields. */
+        runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_DATA, 16);
+        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeDATA, 8);
+    }
+
+
+    /* Simple validation. */
+    if (pFormat->container == drwav_container_riff) {
+        if (runningPos != 20 + chunkSizeFMT + 8) {
+            return DRWAV_FALSE;
+        }
+    } else {
+        if (runningPos != 40 + chunkSizeFMT + 24) {
+            return DRWAV_FALSE;
+        }
+    }
+    
+
+    /* Set some properties for the client's convenience. */
+    pWav->container = pFormat->container;
+    pWav->channels = (drwav_uint16)pFormat->channels;
+    pWav->sampleRate = pFormat->sampleRate;
+    pWav->bitsPerSample = (drwav_uint16)pFormat->bitsPerSample;
+    pWav->translatedFormatTag = (drwav_uint16)pFormat->format;
+
+    return DRWAV_TRUE;
+}
+
+
+DRWAV_API drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (!drwav_preinit_write(pWav, pFormat, DRWAV_FALSE, onWrite, onSeek, pUserData, pAllocationCallbacks)) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init_write__internal(pWav, pFormat, 0);               /* DRWAV_FALSE = Not Sequential */
+}
+
+DRWAV_API drwav_bool32 drwav_init_write_sequential(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (!drwav_preinit_write(pWav, pFormat, DRWAV_TRUE, onWrite, NULL, pUserData, pAllocationCallbacks)) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init_write__internal(pWav, pFormat, totalSampleCount); /* DRWAV_TRUE = Sequential */
+}
+
+DRWAV_API drwav_bool32 drwav_init_write_sequential_pcm_frames(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pFormat == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init_write_sequential(pWav, pFormat, totalPCMFrameCount*pFormat->channels, onWrite, pUserData, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_uint64 drwav_target_write_size_bytes(const drwav_data_format* pFormat, drwav_uint64 totalSampleCount)
+{
+    /* Casting totalSampleCount to drwav_int64 for VC6 compatibility. No issues in practice because nobody is going to exhaust the whole 63 bits. */
+    drwav_uint64 targetDataSizeBytes = (drwav_uint64)((drwav_int64)totalSampleCount * pFormat->channels * pFormat->bitsPerSample/8.0);
+    drwav_uint64 riffChunkSizeBytes;
+    drwav_uint64 fileSizeBytes;
+
+    if (pFormat->container == drwav_container_riff) {
+        riffChunkSizeBytes = drwav__riff_chunk_size_riff(targetDataSizeBytes);
+        fileSizeBytes = (8 + riffChunkSizeBytes); /* +8 because WAV doesn't include the size of the ChunkID and ChunkSize fields. */
+    } else {
+        riffChunkSizeBytes = drwav__riff_chunk_size_w64(targetDataSizeBytes);
+        fileSizeBytes = riffChunkSizeBytes;
+    }
+
+    return fileSizeBytes;
+}
+
+
+#ifndef DR_WAV_NO_STDIO
+
+/* drwav_result_from_errno() is only used for fopen() and wfopen() so putting it inside DR_WAV_NO_STDIO for now. If something else needs this later we can move it out. */
+#include <errno.h>
+static drwav_result drwav_result_from_errno(int e)
+{
+    switch (e)
+    {
+        case 0: return DRWAV_SUCCESS;
+    #ifdef EPERM
+        case EPERM: return DRWAV_INVALID_OPERATION;
+    #endif
+    #ifdef ENOENT
+        case ENOENT: return DRWAV_DOES_NOT_EXIST;
+    #endif
+    #ifdef ESRCH
+        case ESRCH: return DRWAV_DOES_NOT_EXIST;
+    #endif
+    #ifdef EINTR
+        case EINTR: return DRWAV_INTERRUPT;
+    #endif
+    #ifdef EIO
+        case EIO: return DRWAV_IO_ERROR;
+    #endif
+    #ifdef ENXIO
+        case ENXIO: return DRWAV_DOES_NOT_EXIST;
+    #endif
+    #ifdef E2BIG
+        case E2BIG: return DRWAV_INVALID_ARGS;
+    #endif
+    #ifdef ENOEXEC
+        case ENOEXEC: return DRWAV_INVALID_FILE;
+    #endif
+    #ifdef EBADF
+        case EBADF: return DRWAV_INVALID_FILE;
+    #endif
+    #ifdef ECHILD
+        case ECHILD: return DRWAV_ERROR;
+    #endif
+    #ifdef EAGAIN
+        case EAGAIN: return DRWAV_UNAVAILABLE;
+    #endif
+    #ifdef ENOMEM
+        case ENOMEM: return DRWAV_OUT_OF_MEMORY;
+    #endif
+    #ifdef EACCES
+        case EACCES: return DRWAV_ACCESS_DENIED;
+    #endif
+    #ifdef EFAULT
+        case EFAULT: return DRWAV_BAD_ADDRESS;
+    #endif
+    #ifdef ENOTBLK
+        case ENOTBLK: return DRWAV_ERROR;
+    #endif
+    #ifdef EBUSY
+        case EBUSY: return DRWAV_BUSY;
+    #endif
+    #ifdef EEXIST
+        case EEXIST: return DRWAV_ALREADY_EXISTS;
+    #endif
+    #ifdef EXDEV
+        case EXDEV: return DRWAV_ERROR;
+    #endif
+    #ifdef ENODEV
+        case ENODEV: return DRWAV_DOES_NOT_EXIST;
+    #endif
+    #ifdef ENOTDIR
+        case ENOTDIR: return DRWAV_NOT_DIRECTORY;
+    #endif
+    #ifdef EISDIR
+        case EISDIR: return DRWAV_IS_DIRECTORY;
+    #endif
+    #ifdef EINVAL
+        case EINVAL: return DRWAV_INVALID_ARGS;
+    #endif
+    #ifdef ENFILE
+        case ENFILE: return DRWAV_TOO_MANY_OPEN_FILES;
+    #endif
+    #ifdef EMFILE
+        case EMFILE: return DRWAV_TOO_MANY_OPEN_FILES;
+    #endif
+    #ifdef ENOTTY
+        case ENOTTY: return DRWAV_INVALID_OPERATION;
+    #endif
+    #ifdef ETXTBSY
+        case ETXTBSY: return DRWAV_BUSY;
+    #endif
+    #ifdef EFBIG
+        case EFBIG: return DRWAV_TOO_BIG;
+    #endif
+    #ifdef ENOSPC
+        case ENOSPC: return DRWAV_NO_SPACE;
+    #endif
+    #ifdef ESPIPE
+        case ESPIPE: return DRWAV_BAD_SEEK;
+    #endif
+    #ifdef EROFS
+        case EROFS: return DRWAV_ACCESS_DENIED;
+    #endif
+    #ifdef EMLINK
+        case EMLINK: return DRWAV_TOO_MANY_LINKS;
+    #endif
+    #ifdef EPIPE
+        case EPIPE: return DRWAV_BAD_PIPE;
+    #endif
+    #ifdef EDOM
+        case EDOM: return DRWAV_OUT_OF_RANGE;
+    #endif
+    #ifdef ERANGE
+        case ERANGE: return DRWAV_OUT_OF_RANGE;
+    #endif
+    #ifdef EDEADLK
+        case EDEADLK: return DRWAV_DEADLOCK;
+    #endif
+    #ifdef ENAMETOOLONG
+        case ENAMETOOLONG: return DRWAV_PATH_TOO_LONG;
+    #endif
+    #ifdef ENOLCK
+        case ENOLCK: return DRWAV_ERROR;
+    #endif
+    #ifdef ENOSYS
+        case ENOSYS: return DRWAV_NOT_IMPLEMENTED;
+    #endif
+    #ifdef ENOTEMPTY
+        case ENOTEMPTY: return DRWAV_DIRECTORY_NOT_EMPTY;
+    #endif
+    #ifdef ELOOP
+        case ELOOP: return DRWAV_TOO_MANY_LINKS;
+    #endif
+    #ifdef ENOMSG
+        case ENOMSG: return DRWAV_NO_MESSAGE;
+    #endif
+    #ifdef EIDRM
+        case EIDRM: return DRWAV_ERROR;
+    #endif
+    #ifdef ECHRNG
+        case ECHRNG: return DRWAV_ERROR;
+    #endif
+    #ifdef EL2NSYNC
+        case EL2NSYNC: return DRWAV_ERROR;
+    #endif
+    #ifdef EL3HLT
+        case EL3HLT: return DRWAV_ERROR;
+    #endif
+    #ifdef EL3RST
+        case EL3RST: return DRWAV_ERROR;
+    #endif
+    #ifdef ELNRNG
+        case ELNRNG: return DRWAV_OUT_OF_RANGE;
+    #endif
+    #ifdef EUNATCH
+        case EUNATCH: return DRWAV_ERROR;
+    #endif
+    #ifdef ENOCSI
+        case ENOCSI: return DRWAV_ERROR;
+    #endif
+    #ifdef EL2HLT
+        case EL2HLT: return DRWAV_ERROR;
+    #endif
+    #ifdef EBADE
+        case EBADE: return DRWAV_ERROR;
+    #endif
+    #ifdef EBADR
+        case EBADR: return DRWAV_ERROR;
+    #endif
+    #ifdef EXFULL
+        case EXFULL: return DRWAV_ERROR;
+    #endif
+    #ifdef ENOANO
+        case ENOANO: return DRWAV_ERROR;
+    #endif
+    #ifdef EBADRQC
+        case EBADRQC: return DRWAV_ERROR;
+    #endif
+    #ifdef EBADSLT
+        case EBADSLT: return DRWAV_ERROR;
+    #endif
+    #ifdef EBFONT
+        case EBFONT: return DRWAV_INVALID_FILE;
+    #endif
+    #ifdef ENOSTR
+        case ENOSTR: return DRWAV_ERROR;
+    #endif
+    #ifdef ENODATA
+        case ENODATA: return DRWAV_NO_DATA_AVAILABLE;
+    #endif
+    #ifdef ETIME
+        case ETIME: return DRWAV_TIMEOUT;
+    #endif
+    #ifdef ENOSR
+        case ENOSR: return DRWAV_NO_DATA_AVAILABLE;
+    #endif
+    #ifdef ENONET
+        case ENONET: return DRWAV_NO_NETWORK;
+    #endif
+    #ifdef ENOPKG
+        case ENOPKG: return DRWAV_ERROR;
+    #endif
+    #ifdef EREMOTE
+        case EREMOTE: return DRWAV_ERROR;
+    #endif
+    #ifdef ENOLINK
+        case ENOLINK: return DRWAV_ERROR;
+    #endif
+    #ifdef EADV
+        case EADV: return DRWAV_ERROR;
+    #endif
+    #ifdef ESRMNT
+        case ESRMNT: return DRWAV_ERROR;
+    #endif
+    #ifdef ECOMM
+        case ECOMM: return DRWAV_ERROR;
+    #endif
+    #ifdef EPROTO
+        case EPROTO: return DRWAV_ERROR;
+    #endif
+    #ifdef EMULTIHOP
+        case EMULTIHOP: return DRWAV_ERROR;
+    #endif
+    #ifdef EDOTDOT
+        case EDOTDOT: return DRWAV_ERROR;
+    #endif
+    #ifdef EBADMSG
+        case EBADMSG: return DRWAV_BAD_MESSAGE;
+    #endif
+    #ifdef EOVERFLOW
+        case EOVERFLOW: return DRWAV_TOO_BIG;
+    #endif
+    #ifdef ENOTUNIQ
+        case ENOTUNIQ: return DRWAV_NOT_UNIQUE;
+    #endif
+    #ifdef EBADFD
+        case EBADFD: return DRWAV_ERROR;
+    #endif
+    #ifdef EREMCHG
+        case EREMCHG: return DRWAV_ERROR;
+    #endif
+    #ifdef ELIBACC
+        case ELIBACC: return DRWAV_ACCESS_DENIED;
+    #endif
+    #ifdef ELIBBAD
+        case ELIBBAD: return DRWAV_INVALID_FILE;
+    #endif
+    #ifdef ELIBSCN
+        case ELIBSCN: return DRWAV_INVALID_FILE;
+    #endif
+    #ifdef ELIBMAX
+        case ELIBMAX: return DRWAV_ERROR;
+    #endif
+    #ifdef ELIBEXEC
+        case ELIBEXEC: return DRWAV_ERROR;
+    #endif
+    #ifdef EILSEQ
+        case EILSEQ: return DRWAV_INVALID_DATA;
+    #endif
+    #ifdef ERESTART
+        case ERESTART: return DRWAV_ERROR;
+    #endif
+    #ifdef ESTRPIPE
+        case ESTRPIPE: return DRWAV_ERROR;
+    #endif
+    #ifdef EUSERS
+        case EUSERS: return DRWAV_ERROR;
+    #endif
+    #ifdef ENOTSOCK
+        case ENOTSOCK: return DRWAV_NOT_SOCKET;
+    #endif
+    #ifdef EDESTADDRREQ
+        case EDESTADDRREQ: return DRWAV_NO_ADDRESS;
+    #endif
+    #ifdef EMSGSIZE
+        case EMSGSIZE: return DRWAV_TOO_BIG;
+    #endif
+    #ifdef EPROTOTYPE
+        case EPROTOTYPE: return DRWAV_BAD_PROTOCOL;
+    #endif
+    #ifdef ENOPROTOOPT
+        case ENOPROTOOPT: return DRWAV_PROTOCOL_UNAVAILABLE;
+    #endif
+    #ifdef EPROTONOSUPPORT
+        case EPROTONOSUPPORT: return DRWAV_PROTOCOL_NOT_SUPPORTED;
+    #endif
+    #ifdef ESOCKTNOSUPPORT
+        case ESOCKTNOSUPPORT: return DRWAV_SOCKET_NOT_SUPPORTED;
+    #endif
+    #ifdef EOPNOTSUPP
+        case EOPNOTSUPP: return DRWAV_INVALID_OPERATION;
+    #endif
+    #ifdef EPFNOSUPPORT
+        case EPFNOSUPPORT: return DRWAV_PROTOCOL_FAMILY_NOT_SUPPORTED;
+    #endif
+    #ifdef EAFNOSUPPORT
+        case EAFNOSUPPORT: return DRWAV_ADDRESS_FAMILY_NOT_SUPPORTED;
+    #endif
+    #ifdef EADDRINUSE
+        case EADDRINUSE: return DRWAV_ALREADY_IN_USE;
+    #endif
+    #ifdef EADDRNOTAVAIL
+        case EADDRNOTAVAIL: return DRWAV_ERROR;
+    #endif
+    #ifdef ENETDOWN
+        case ENETDOWN: return DRWAV_NO_NETWORK;
+    #endif
+    #ifdef ENETUNREACH
+        case ENETUNREACH: return DRWAV_NO_NETWORK;
+    #endif
+    #ifdef ENETRESET
+        case ENETRESET: return DRWAV_NO_NETWORK;
+    #endif
+    #ifdef ECONNABORTED
+        case ECONNABORTED: return DRWAV_NO_NETWORK;
+    #endif
+    #ifdef ECONNRESET
+        case ECONNRESET: return DRWAV_CONNECTION_RESET;
+    #endif
+    #ifdef ENOBUFS
+        case ENOBUFS: return DRWAV_NO_SPACE;
+    #endif
+    #ifdef EISCONN
+        case EISCONN: return DRWAV_ALREADY_CONNECTED;
+    #endif
+    #ifdef ENOTCONN
+        case ENOTCONN: return DRWAV_NOT_CONNECTED;
+    #endif
+    #ifdef ESHUTDOWN
+        case ESHUTDOWN: return DRWAV_ERROR;
+    #endif
+    #ifdef ETOOMANYREFS
+        case ETOOMANYREFS: return DRWAV_ERROR;
+    #endif
+    #ifdef ETIMEDOUT
+        case ETIMEDOUT: return DRWAV_TIMEOUT;
+    #endif
+    #ifdef ECONNREFUSED
+        case ECONNREFUSED: return DRWAV_CONNECTION_REFUSED;
+    #endif
+    #ifdef EHOSTDOWN
+        case EHOSTDOWN: return DRWAV_NO_HOST;
+    #endif
+    #ifdef EHOSTUNREACH
+        case EHOSTUNREACH: return DRWAV_NO_HOST;
+    #endif
+    #ifdef EALREADY
+        case EALREADY: return DRWAV_IN_PROGRESS;
+    #endif
+    #ifdef EINPROGRESS
+        case EINPROGRESS: return DRWAV_IN_PROGRESS;
+    #endif
+    #ifdef ESTALE
+        case ESTALE: return DRWAV_INVALID_FILE;
+    #endif
+    #ifdef EUCLEAN
+        case EUCLEAN: return DRWAV_ERROR;
+    #endif
+    #ifdef ENOTNAM
+        case ENOTNAM: return DRWAV_ERROR;
+    #endif
+    #ifdef ENAVAIL
+        case ENAVAIL: return DRWAV_ERROR;
+    #endif
+    #ifdef EISNAM
+        case EISNAM: return DRWAV_ERROR;
+    #endif
+    #ifdef EREMOTEIO
+        case EREMOTEIO: return DRWAV_IO_ERROR;
+    #endif
+    #ifdef EDQUOT
+        case EDQUOT: return DRWAV_NO_SPACE;
+    #endif
+    #ifdef ENOMEDIUM
+        case ENOMEDIUM: return DRWAV_DOES_NOT_EXIST;
+    #endif
+    #ifdef EMEDIUMTYPE
+        case EMEDIUMTYPE: return DRWAV_ERROR;
+    #endif
+    #ifdef ECANCELED
+        case ECANCELED: return DRWAV_CANCELLED;
+    #endif
+    #ifdef ENOKEY
+        case ENOKEY: return DRWAV_ERROR;
+    #endif
+    #ifdef EKEYEXPIRED
+        case EKEYEXPIRED: return DRWAV_ERROR;
+    #endif
+    #ifdef EKEYREVOKED
+        case EKEYREVOKED: return DRWAV_ERROR;
+    #endif
+    #ifdef EKEYREJECTED
+        case EKEYREJECTED: return DRWAV_ERROR;
+    #endif
+    #ifdef EOWNERDEAD
+        case EOWNERDEAD: return DRWAV_ERROR;
+    #endif
+    #ifdef ENOTRECOVERABLE
+        case ENOTRECOVERABLE: return DRWAV_ERROR;
+    #endif
+    #ifdef ERFKILL
+        case ERFKILL: return DRWAV_ERROR;
+    #endif
+    #ifdef EHWPOISON
+        case EHWPOISON: return DRWAV_ERROR;
+    #endif
+        default: return DRWAV_ERROR;
+    }
+}
+
+static drwav_result drwav_fopen(FILE** ppFile, const char* pFilePath, const char* pOpenMode)
+{
+#if _MSC_VER && _MSC_VER >= 1400
+    errno_t err;
+#endif
+
+    if (ppFile != NULL) {
+        *ppFile = NULL;  /* Safety. */
+    }
+
+    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
+        return DRWAV_INVALID_ARGS;
+    }
+
+#if _MSC_VER && _MSC_VER >= 1400
+    err = fopen_s(ppFile, pFilePath, pOpenMode);
+    if (err != 0) {
+        return drwav_result_from_errno(err);
+    }
+#else
+#if defined(_WIN32) || defined(__APPLE__)
+    *ppFile = fopen(pFilePath, pOpenMode);
+#else
+    #if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS == 64 && defined(_LARGEFILE64_SOURCE)
+        *ppFile = fopen64(pFilePath, pOpenMode);
+    #else
+        *ppFile = fopen(pFilePath, pOpenMode);
+    #endif
+#endif
+    if (*ppFile == NULL) {
+        drwav_result result = drwav_result_from_errno(errno);
+        if (result == DRWAV_SUCCESS) {
+            result = DRWAV_ERROR;   /* Just a safety check to make sure we never ever return success when pFile == NULL. */
+        }
+
+        return result;
+    }
+#endif
+
+    return DRWAV_SUCCESS;
+}
+
+/*
+_wfopen() isn't always available in all compilation environments.
+
+    * Windows only.
+    * MSVC seems to support it universally as far back as VC6 from what I can tell (haven't checked further back).
+    * MinGW-64 (both 32- and 64-bit) seems to support it.
+    * MinGW wraps it in !defined(__STRICT_ANSI__).
+
+This can be reviewed as compatibility issues arise. The preference is to use _wfopen_s() and _wfopen() as opposed to the wcsrtombs()
+fallback, so if you notice your compiler not detecting this properly I'm happy to look at adding support.
+*/
+#if defined(_WIN32)
+    #if defined(_MSC_VER) || defined(__MINGW64__) || !defined(__STRICT_ANSI__)
+        #define DRWAV_HAS_WFOPEN
+    #endif
+#endif
+
+static drwav_result drwav_wfopen(FILE** ppFile, const wchar_t* pFilePath, const wchar_t* pOpenMode, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (ppFile != NULL) {
+        *ppFile = NULL;  /* Safety. */
+    }
+
+    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
+        return DRWAV_INVALID_ARGS;
+    }
+
+#if defined(DRWAV_HAS_WFOPEN)
+    {
+        /* Use _wfopen() on Windows. */
+    #if defined(_MSC_VER) && _MSC_VER >= 1400
+        errno_t err = _wfopen_s(ppFile, pFilePath, pOpenMode);
+        if (err != 0) {
+            return drwav_result_from_errno(err);
+        }
+    #else
+        *ppFile = _wfopen(pFilePath, pOpenMode);
+        if (*ppFile == NULL) {
+            return drwav_result_from_errno(errno);
+        }
+    #endif
+        (void)pAllocationCallbacks;
+    }
+#else
+    /*
+    Use fopen() on anything other than Windows. Requires a conversion. This is annoying because fopen() is locale specific. The only real way I can
+    think of to do this is with wcsrtombs(). Note that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for
+    maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler error I'll look into improving compatibility.
+    */
+    {
+        mbstate_t mbs;
+        size_t lenMB;
+        const wchar_t* pFilePathTemp = pFilePath;
+        char* pFilePathMB = NULL;
+        char pOpenModeMB[32] = {0};
+
+        /* Get the length first. */
+        DRWAV_ZERO_OBJECT(&mbs);
+        lenMB = wcsrtombs(NULL, &pFilePathTemp, 0, &mbs);
+        if (lenMB == (size_t)-1) {
+            return drwav_result_from_errno(errno);
+        }
+
+        pFilePathMB = (char*)drwav__malloc_from_callbacks(lenMB + 1, pAllocationCallbacks);
+        if (pFilePathMB == NULL) {
+            return DRWAV_OUT_OF_MEMORY;
+        }
+
+        pFilePathTemp = pFilePath;
+        DRWAV_ZERO_OBJECT(&mbs);
+        wcsrtombs(pFilePathMB, &pFilePathTemp, lenMB + 1, &mbs);
+
+        /* The open mode should always consist of ASCII characters so we should be able to do a trivial conversion. */
+        {
+            size_t i = 0;
+            for (;;) {
+                if (pOpenMode[i] == 0) {
+                    pOpenModeMB[i] = '\0';
+                    break;
+                }
+
+                pOpenModeMB[i] = (char)pOpenMode[i];
+                i += 1;
+            }
+        }
+
+        *ppFile = fopen(pFilePathMB, pOpenModeMB);
+
+        drwav__free_from_callbacks(pFilePathMB, pAllocationCallbacks);
+    }
+
+    if (*ppFile == NULL) {
+        return DRWAV_ERROR;
+    }
+#endif
+
+    return DRWAV_SUCCESS;
+}
+
+
+static size_t drwav__on_read_stdio(void* pUserData, void* pBufferOut, size_t bytesToRead)
+{
+    return fread(pBufferOut, 1, bytesToRead, (FILE*)pUserData);
+}
+
+static size_t drwav__on_write_stdio(void* pUserData, const void* pData, size_t bytesToWrite)
+{
+    return fwrite(pData, 1, bytesToWrite, (FILE*)pUserData);
+}
+
+static drwav_bool32 drwav__on_seek_stdio(void* pUserData, int offset, drwav_seek_origin origin)
+{
+    return fseek((FILE*)pUserData, offset, (origin == drwav_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0;
+}
+
+DRWAV_API drwav_bool32 drwav_init_file(drwav* pWav, const char* filename, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_file_ex(pWav, filename, NULL, NULL, 0, pAllocationCallbacks);
+}
+
+
+static drwav_bool32 drwav_init_file__internal_FILE(drwav* pWav, FILE* pFile, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav_bool32 result;
+
+    result = drwav_preinit(pWav, drwav__on_read_stdio, drwav__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    if (result != DRWAV_TRUE) {
+        fclose(pFile);
+        return result;
+    }
+
+    result = drwav_init__internal(pWav, onChunk, pChunkUserData, flags);
+    if (result != DRWAV_TRUE) {
+        fclose(pFile);
+        return result;
+    }
+
+    return DRWAV_TRUE;
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_ex(drwav* pWav, const char* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    FILE* pFile;
+    if (drwav_fopen(&pFile, filename, "rb") != DRWAV_SUCCESS) {
+        return DRWAV_FALSE;
+    }
+
+    /* This takes ownership of the FILE* object. */
+    return drwav_init_file__internal_FILE(pWav, pFile, onChunk, pChunkUserData, flags, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_w(drwav* pWav, const wchar_t* filename, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_file_ex_w(pWav, filename, NULL, NULL, 0, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_ex_w(drwav* pWav, const wchar_t* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    FILE* pFile;
+    if (drwav_wfopen(&pFile, filename, L"rb", pAllocationCallbacks) != DRWAV_SUCCESS) {
+        return DRWAV_FALSE;
+    }
+
+    /* This takes ownership of the FILE* object. */
+    return drwav_init_file__internal_FILE(pWav, pFile, onChunk, pChunkUserData, flags, pAllocationCallbacks);
+}
+
+
+static drwav_bool32 drwav_init_file_write__internal_FILE(drwav* pWav, FILE* pFile, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav_bool32 result;
+
+    result = drwav_preinit_write(pWav, pFormat, isSequential, drwav__on_write_stdio, drwav__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    if (result != DRWAV_TRUE) {
+        fclose(pFile);
+        return result;
+    }
+
+    result = drwav_init_write__internal(pWav, pFormat, totalSampleCount);
+    if (result != DRWAV_TRUE) {
+        fclose(pFile);
+        return result;
+    }
+
+    return DRWAV_TRUE;
+}
+
+static drwav_bool32 drwav_init_file_write__internal(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    FILE* pFile;
+    if (drwav_fopen(&pFile, filename, "wb") != DRWAV_SUCCESS) {
+        return DRWAV_FALSE;
+    }
+
+    /* This takes ownership of the FILE* object. */
+    return drwav_init_file_write__internal_FILE(pWav, pFile, pFormat, totalSampleCount, isSequential, pAllocationCallbacks);
+}
+
+static drwav_bool32 drwav_init_file_write_w__internal(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    FILE* pFile;
+    if (drwav_wfopen(&pFile, filename, L"wb", pAllocationCallbacks) != DRWAV_SUCCESS) {
+        return DRWAV_FALSE;
+    }
+
+    /* This takes ownership of the FILE* object. */
+    return drwav_init_file_write__internal_FILE(pWav, pFile, pFormat, totalSampleCount, isSequential, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_write(drwav* pWav, const char* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_file_write__internal(pWav, filename, pFormat, 0, DRWAV_FALSE, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_file_write__internal(pWav, filename, pFormat, totalSampleCount, DRWAV_TRUE, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pFormat == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init_file_write_sequential(pWav, filename, pFormat, totalPCMFrameCount*pFormat->channels, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_write_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_file_write_w__internal(pWav, filename, pFormat, 0, DRWAV_FALSE, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_file_write_w__internal(pWav, filename, pFormat, totalSampleCount, DRWAV_TRUE, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pFormat == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init_file_write_sequential_w(pWav, filename, pFormat, totalPCMFrameCount*pFormat->channels, pAllocationCallbacks);
+}
+#endif  /* DR_WAV_NO_STDIO */
+
+
+static size_t drwav__on_read_memory(void* pUserData, void* pBufferOut, size_t bytesToRead)
+{
+    drwav* pWav = (drwav*)pUserData;
+    size_t bytesRemaining;
+
+    DRWAV_ASSERT(pWav != NULL);
+    DRWAV_ASSERT(pWav->memoryStream.dataSize >= pWav->memoryStream.currentReadPos);
+
+    bytesRemaining = pWav->memoryStream.dataSize - pWav->memoryStream.currentReadPos;
+    if (bytesToRead > bytesRemaining) {
+        bytesToRead = bytesRemaining;
+    }
+
+    if (bytesToRead > 0) {
+        DRWAV_COPY_MEMORY(pBufferOut, pWav->memoryStream.data + pWav->memoryStream.currentReadPos, bytesToRead);
+        pWav->memoryStream.currentReadPos += bytesToRead;
+    }
+
+    return bytesToRead;
+}
+
+static drwav_bool32 drwav__on_seek_memory(void* pUserData, int offset, drwav_seek_origin origin)
+{
+    drwav* pWav = (drwav*)pUserData;
+    DRWAV_ASSERT(pWav != NULL);
+
+    if (origin == drwav_seek_origin_current) {
+        if (offset > 0) {
+            if (pWav->memoryStream.currentReadPos + offset > pWav->memoryStream.dataSize) {
+                return DRWAV_FALSE; /* Trying to seek too far forward. */
+            }
+        } else {
+            if (pWav->memoryStream.currentReadPos < (size_t)-offset) {
+                return DRWAV_FALSE; /* Trying to seek too far backwards. */
+            }
+        }
+
+        /* This will never underflow thanks to the clamps above. */
+        pWav->memoryStream.currentReadPos += offset;
+    } else {
+        if ((drwav_uint32)offset <= pWav->memoryStream.dataSize) {
+            pWav->memoryStream.currentReadPos = offset;
+        } else {
+            return DRWAV_FALSE; /* Trying to seek too far forward. */
+        }
+    }
+    
+    return DRWAV_TRUE;
+}
+
+static size_t drwav__on_write_memory(void* pUserData, const void* pDataIn, size_t bytesToWrite)
+{
+    drwav* pWav = (drwav*)pUserData;
+    size_t bytesRemaining;
+
+    DRWAV_ASSERT(pWav != NULL);
+    DRWAV_ASSERT(pWav->memoryStreamWrite.dataCapacity >= pWav->memoryStreamWrite.currentWritePos);
+
+    bytesRemaining = pWav->memoryStreamWrite.dataCapacity - pWav->memoryStreamWrite.currentWritePos;
+    if (bytesRemaining < bytesToWrite) {
+        /* Need to reallocate. */
+        void* pNewData;
+        size_t newDataCapacity = (pWav->memoryStreamWrite.dataCapacity == 0) ? 256 : pWav->memoryStreamWrite.dataCapacity * 2;
+
+        /* If doubling wasn't enough, just make it the minimum required size to write the data. */
+        if ((newDataCapacity - pWav->memoryStreamWrite.currentWritePos) < bytesToWrite) {
+            newDataCapacity = pWav->memoryStreamWrite.currentWritePos + bytesToWrite;
+        }
+
+        pNewData = drwav__realloc_from_callbacks(*pWav->memoryStreamWrite.ppData, newDataCapacity, pWav->memoryStreamWrite.dataCapacity, &pWav->allocationCallbacks);
+        if (pNewData == NULL) {
+            return 0;
+        }
+
+        *pWav->memoryStreamWrite.ppData = pNewData;
+        pWav->memoryStreamWrite.dataCapacity = newDataCapacity;
+    }
+
+    DRWAV_COPY_MEMORY(((drwav_uint8*)(*pWav->memoryStreamWrite.ppData)) + pWav->memoryStreamWrite.currentWritePos, pDataIn, bytesToWrite);
+
+    pWav->memoryStreamWrite.currentWritePos += bytesToWrite;
+    if (pWav->memoryStreamWrite.dataSize < pWav->memoryStreamWrite.currentWritePos) {
+        pWav->memoryStreamWrite.dataSize = pWav->memoryStreamWrite.currentWritePos;
+    }
+
+    *pWav->memoryStreamWrite.pDataSize = pWav->memoryStreamWrite.dataSize;
+
+    return bytesToWrite;
+}
+
+static drwav_bool32 drwav__on_seek_memory_write(void* pUserData, int offset, drwav_seek_origin origin)
+{
+    drwav* pWav = (drwav*)pUserData;
+    DRWAV_ASSERT(pWav != NULL);
+
+    if (origin == drwav_seek_origin_current) {
+        if (offset > 0) {
+            if (pWav->memoryStreamWrite.currentWritePos + offset > pWav->memoryStreamWrite.dataSize) {
+                offset = (int)(pWav->memoryStreamWrite.dataSize - pWav->memoryStreamWrite.currentWritePos);  /* Trying to seek too far forward. */
+            }
+        } else {
+            if (pWav->memoryStreamWrite.currentWritePos < (size_t)-offset) {
+                offset = -(int)pWav->memoryStreamWrite.currentWritePos;  /* Trying to seek too far backwards. */
+            }
+        }
+
+        /* This will never underflow thanks to the clamps above. */
+        pWav->memoryStreamWrite.currentWritePos += offset;
+    } else {
+        if ((drwav_uint32)offset <= pWav->memoryStreamWrite.dataSize) {
+            pWav->memoryStreamWrite.currentWritePos = offset;
+        } else {
+            pWav->memoryStreamWrite.currentWritePos = pWav->memoryStreamWrite.dataSize;  /* Trying to seek too far forward. */
+        }
+    }
+    
+    return DRWAV_TRUE;
+}
+
+DRWAV_API drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_memory_ex(pWav, data, dataSize, NULL, NULL, 0, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_memory_ex(drwav* pWav, const void* data, size_t dataSize, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (data == NULL || dataSize == 0) {
+        return DRWAV_FALSE;
+    }
+
+    if (!drwav_preinit(pWav, drwav__on_read_memory, drwav__on_seek_memory, pWav, pAllocationCallbacks)) {
+        return DRWAV_FALSE;
+    }
+
+    pWav->memoryStream.data = (const drwav_uint8*)data;
+    pWav->memoryStream.dataSize = dataSize;
+    pWav->memoryStream.currentReadPos = 0;
+
+    return drwav_init__internal(pWav, onChunk, pChunkUserData, flags);
+}
+
+
+static drwav_bool32 drwav_init_memory_write__internal(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (ppData == NULL || pDataSize == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    *ppData = NULL; /* Important because we're using realloc()! */
+    *pDataSize = 0;
+
+    if (!drwav_preinit_write(pWav, pFormat, isSequential, drwav__on_write_memory, drwav__on_seek_memory_write, pWav, pAllocationCallbacks)) {
+        return DRWAV_FALSE;
+    }
+
+    pWav->memoryStreamWrite.ppData = ppData;
+    pWav->memoryStreamWrite.pDataSize = pDataSize;
+    pWav->memoryStreamWrite.dataSize = 0;
+    pWav->memoryStreamWrite.dataCapacity = 0;
+    pWav->memoryStreamWrite.currentWritePos = 0;
+
+    return drwav_init_write__internal(pWav, pFormat, totalSampleCount);
+}
+
+DRWAV_API drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_memory_write__internal(pWav, ppData, pDataSize, pFormat, 0, DRWAV_FALSE, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_memory_write_sequential(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_memory_write__internal(pWav, ppData, pDataSize, pFormat, totalSampleCount, DRWAV_TRUE, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_memory_write_sequential_pcm_frames(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pFormat == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init_memory_write_sequential(pWav, ppData, pDataSize, pFormat, totalPCMFrameCount*pFormat->channels, pAllocationCallbacks);
+}
+
+
+
+DRWAV_API drwav_result drwav_uninit(drwav* pWav)
+{
+    drwav_result result = DRWAV_SUCCESS;
+
+    if (pWav == NULL) {
+        return DRWAV_INVALID_ARGS;
+    }
+
+    /*
+    If the drwav object was opened in write mode we'll need to finalize a few things:
+      - Make sure the "data" chunk is aligned to 16-bits for RIFF containers, or 64 bits for W64 containers.
+      - Set the size of the "data" chunk.
+    */
+    if (pWav->onWrite != NULL) {
+        drwav_uint32 paddingSize = 0;
+
+        /* Padding. Do not adjust pWav->dataChunkDataSize - this should not include the padding. */
+        if (pWav->container == drwav_container_riff) {
+            paddingSize = drwav__chunk_padding_size_riff(pWav->dataChunkDataSize);
+        } else {
+            paddingSize = drwav__chunk_padding_size_w64(pWav->dataChunkDataSize);
+        }
+        
+        if (paddingSize > 0) {
+            drwav_uint64 paddingData = 0;
+            pWav->onWrite(pWav->pUserData, &paddingData, paddingSize);
+        }
+
+        /*
+        Chunk sizes. When using sequential mode, these will have been filled in at initialization time. We only need
+        to do this when using non-sequential mode.
+        */
+        if (pWav->onSeek && !pWav->isSequentialWrite) {
+            if (pWav->container == drwav_container_riff) {
+                /* The "RIFF" chunk size. */
+                if (pWav->onSeek(pWav->pUserData, 4, drwav_seek_origin_start)) {
+                    drwav_uint32 riffChunkSize = drwav__riff_chunk_size_riff(pWav->dataChunkDataSize);
+                    pWav->onWrite(pWav->pUserData, &riffChunkSize, 4);
+                }
+
+                /* the "data" chunk size. */
+                if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos + 4, drwav_seek_origin_start)) {
+                    drwav_uint32 dataChunkSize = drwav__data_chunk_size_riff(pWav->dataChunkDataSize);
+                    pWav->onWrite(pWav->pUserData, &dataChunkSize, 4);
+                }
+            } else {
+                /* The "RIFF" chunk size. */
+                if (pWav->onSeek(pWav->pUserData, 16, drwav_seek_origin_start)) {
+                    drwav_uint64 riffChunkSize = drwav__riff_chunk_size_w64(pWav->dataChunkDataSize);
+                    pWav->onWrite(pWav->pUserData, &riffChunkSize, 8);
+                }
+
+                /* The "data" chunk size. */
+                if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos + 16, drwav_seek_origin_start)) {
+                    drwav_uint64 dataChunkSize = drwav__data_chunk_size_w64(pWav->dataChunkDataSize);
+                    pWav->onWrite(pWav->pUserData, &dataChunkSize, 8);
+                }
+            }
+        }
+
+        /* Validation for sequential mode. */
+        if (pWav->isSequentialWrite) {
+            if (pWav->dataChunkDataSize != pWav->dataChunkDataSizeTargetWrite) {
+                result = DRWAV_INVALID_FILE;
+            }
+        }
+    }
+
+#ifndef DR_WAV_NO_STDIO
+    /*
+    If we opened the file with drwav_open_file() we will want to close the file handle. We can know whether or not drwav_open_file()
+    was used by looking at the onRead and onSeek callbacks.
+    */
+    if (pWav->onRead == drwav__on_read_stdio || pWav->onWrite == drwav__on_write_stdio) {
+        fclose((FILE*)pWav->pUserData);
+    }
+#endif
+
+    return result;
+}
+
+
+
+DRWAV_API size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut)
+{
+    size_t bytesRead;
+
+    if (pWav == NULL || bytesToRead == 0 || pBufferOut == NULL) {
+        return 0;
+    }
+
+    if (bytesToRead > pWav->bytesRemaining) {
+        bytesToRead = (size_t)pWav->bytesRemaining;
+    }
+
+    bytesRead = pWav->onRead(pWav->pUserData, pBufferOut, bytesToRead);
+
+    pWav->bytesRemaining -= bytesRead;
+    return bytesRead;
+}
+
+
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_le(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut)
+{
+    drwav_uint32 bytesPerFrame;
+
+    if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) {
+        return 0;
+    }
+
+    /* Cannot use this function for compressed formats. */
+    if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
+        return 0;
+    }
+
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    /* Don't try to read more samples than can potentially fit in the output buffer. */
+    if (framesToRead * bytesPerFrame > DRWAV_SIZE_MAX) {
+        framesToRead = DRWAV_SIZE_MAX / bytesPerFrame;
+    }
+
+    return drwav_read_raw(pWav, (size_t)(framesToRead * bytesPerFrame), pBufferOut) / bytesPerFrame;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_be(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut)
+{
+    drwav_uint64 framesRead = drwav_read_pcm_frames_le(pWav, framesToRead, pBufferOut);
+    drwav__bswap_samples(pBufferOut, framesRead*pWav->channels, drwav_get_bytes_per_pcm_frame(pWav)/pWav->channels, pWav->translatedFormatTag);
+
+    return framesRead;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut)
+{
+    if (drwav__is_little_endian()) {
+        return drwav_read_pcm_frames_le(pWav, framesToRead, pBufferOut);
+    } else {
+        return drwav_read_pcm_frames_be(pWav, framesToRead, pBufferOut);
+    }
+}
+
+
+
+DRWAV_API drwav_bool32 drwav_seek_to_first_pcm_frame(drwav* pWav)
+{
+    if (pWav->onWrite != NULL) {
+        return DRWAV_FALSE; /* No seeking in write mode. */
+    }
+
+    if (!pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos, drwav_seek_origin_start)) {
+        return DRWAV_FALSE;
+    }
+
+    if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
+        pWav->compressed.iCurrentPCMFrame = 0;
+    }
+    
+    pWav->bytesRemaining = pWav->dataChunkDataSize;
+    return DRWAV_TRUE;
+}
+
+DRWAV_API drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64 targetFrameIndex)
+{
+    /* Seeking should be compatible with wave files > 2GB. */
+
+    if (pWav == NULL || pWav->onSeek == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    /* No seeking in write mode. */
+    if (pWav->onWrite != NULL) {
+        return DRWAV_FALSE;
+    }
+
+    /* If there are no samples, just return DRWAV_TRUE without doing anything. */
+    if (pWav->totalPCMFrameCount == 0) {
+        return DRWAV_TRUE;
+    }
+
+    /* Make sure the sample is clamped. */
+    if (targetFrameIndex >= pWav->totalPCMFrameCount) {
+        targetFrameIndex  = pWav->totalPCMFrameCount - 1;
+    }
+
+    /*
+    For compressed formats we just use a slow generic seek. If we are seeking forward we just seek forward. If we are going backwards we need
+    to seek back to the start.
+    */
+    if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
+        /* TODO: This can be optimized. */
+        
+        /*
+        If we're seeking forward it's simple - just keep reading samples until we hit the sample we're requesting. If we're seeking backwards,
+        we first need to seek back to the start and then just do the same thing as a forward seek.
+        */
+        if (targetFrameIndex < pWav->compressed.iCurrentPCMFrame) {
+            if (!drwav_seek_to_first_pcm_frame(pWav)) {
+                return DRWAV_FALSE;
+            }
+        }
+
+        if (targetFrameIndex > pWav->compressed.iCurrentPCMFrame) {
+            drwav_uint64 offsetInFrames = targetFrameIndex - pWav->compressed.iCurrentPCMFrame;
+
+            drwav_int16 devnull[2048];
+            while (offsetInFrames > 0) {
+                drwav_uint64 framesRead = 0;
+                drwav_uint64 framesToRead = offsetInFrames;
+                if (framesToRead > drwav_countof(devnull)/pWav->channels) {
+                    framesToRead = drwav_countof(devnull)/pWav->channels;
+                }
+
+                if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+                    framesRead = drwav_read_pcm_frames_s16__msadpcm(pWav, framesToRead, devnull);
+                } else if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+                    framesRead = drwav_read_pcm_frames_s16__ima(pWav, framesToRead, devnull);
+                } else {
+                    DRWAV_ASSERT(DRWAV_FALSE);  /* If this assertion is triggered it means I've implemented a new compressed format but forgot to add a branch for it here. */
+                }
+
+                if (framesRead != framesToRead) {
+                    return DRWAV_FALSE;
+                }
+
+                offsetInFrames -= framesRead;
+            }
+        }
+    } else {
+        drwav_uint64 totalSizeInBytes;
+        drwav_uint64 currentBytePos;
+        drwav_uint64 targetBytePos;
+        drwav_uint64 offset;
+
+        totalSizeInBytes = pWav->totalPCMFrameCount * drwav_get_bytes_per_pcm_frame(pWav);
+        DRWAV_ASSERT(totalSizeInBytes >= pWav->bytesRemaining);
+
+        currentBytePos = totalSizeInBytes - pWav->bytesRemaining;
+        targetBytePos  = targetFrameIndex * drwav_get_bytes_per_pcm_frame(pWav);
+
+        if (currentBytePos < targetBytePos) {
+            /* Offset forwards. */
+            offset = (targetBytePos - currentBytePos);
+        } else {
+            /* Offset backwards. */
+            if (!drwav_seek_to_first_pcm_frame(pWav)) {
+                return DRWAV_FALSE;
+            }
+            offset = targetBytePos;
+        }
+
+        while (offset > 0) {
+            int offset32 = ((offset > INT_MAX) ? INT_MAX : (int)offset);
+            if (!pWav->onSeek(pWav->pUserData, offset32, drwav_seek_origin_current)) {
+                return DRWAV_FALSE;
+            }
+
+            pWav->bytesRemaining -= offset32;
+            offset -= offset32;
+        }
+    }
+
+    return DRWAV_TRUE;
+}
+
+
+DRWAV_API size_t drwav_write_raw(drwav* pWav, size_t bytesToWrite, const void* pData)
+{
+    size_t bytesWritten;
+
+    if (pWav == NULL || bytesToWrite == 0 || pData == NULL) {
+        return 0;
+    }
+
+    bytesWritten = pWav->onWrite(pWav->pUserData, pData, bytesToWrite);
+    pWav->dataChunkDataSize += bytesWritten;
+
+    return bytesWritten;
+}
+
+
+DRWAV_API drwav_uint64 drwav_write_pcm_frames_le(drwav* pWav, drwav_uint64 framesToWrite, const void* pData)
+{
+    drwav_uint64 bytesToWrite;
+    drwav_uint64 bytesWritten;
+    const drwav_uint8* pRunningData;
+
+    if (pWav == NULL || framesToWrite == 0 || pData == NULL) {
+        return 0;
+    }
+
+    bytesToWrite = ((framesToWrite * pWav->channels * pWav->bitsPerSample) / 8);
+    if (bytesToWrite > DRWAV_SIZE_MAX) {
+        return 0;
+    }
+
+    bytesWritten = 0;
+    pRunningData = (const drwav_uint8*)pData;
+
+    while (bytesToWrite > 0) {
+        size_t bytesJustWritten;
+        drwav_uint64 bytesToWriteThisIteration;
+
+        bytesToWriteThisIteration = bytesToWrite;
+        DRWAV_ASSERT(bytesToWriteThisIteration <= DRWAV_SIZE_MAX);  /* <-- This is checked above. */
+
+        bytesJustWritten = drwav_write_raw(pWav, (size_t)bytesToWriteThisIteration, pRunningData);
+        if (bytesJustWritten == 0) {
+            break;
+        }
+
+        bytesToWrite -= bytesJustWritten;
+        bytesWritten += bytesJustWritten;
+        pRunningData += bytesJustWritten;
+    }
+
+    return (bytesWritten * 8) / pWav->bitsPerSample / pWav->channels;
+}
+
+DRWAV_API drwav_uint64 drwav_write_pcm_frames_be(drwav* pWav, drwav_uint64 framesToWrite, const void* pData)
+{
+    drwav_uint64 bytesToWrite;
+    drwav_uint64 bytesWritten;
+    drwav_uint32 bytesPerSample;
+    const drwav_uint8* pRunningData;
+
+    if (pWav == NULL || framesToWrite == 0 || pData == NULL) {
+        return 0;
+    }
+
+    bytesToWrite = ((framesToWrite * pWav->channels * pWav->bitsPerSample) / 8);
+    if (bytesToWrite > DRWAV_SIZE_MAX) {
+        return 0;
+    }
+
+    bytesWritten = 0;
+    pRunningData = (const drwav_uint8*)pData;
+
+    bytesPerSample = drwav_get_bytes_per_pcm_frame(pWav) / pWav->channels;
+    
+    while (bytesToWrite > 0) {
+        drwav_uint8 temp[4096];
+        drwav_uint32 sampleCount;
+        size_t bytesJustWritten;
+        drwav_uint64 bytesToWriteThisIteration;
+
+        bytesToWriteThisIteration = bytesToWrite;
+        DRWAV_ASSERT(bytesToWriteThisIteration <= DRWAV_SIZE_MAX);  /* <-- This is checked above. */
+
+        /*
+        WAV files are always little-endian. We need to byte swap on big-endian architectures. Since our input buffer is read-only we need
+        to use an intermediary buffer for the conversion.
+        */
+        sampleCount = sizeof(temp)/bytesPerSample;
+
+        if (bytesToWriteThisIteration > ((drwav_uint64)sampleCount)*bytesPerSample) {
+            bytesToWriteThisIteration = ((drwav_uint64)sampleCount)*bytesPerSample;
+        }
+
+        DRWAV_COPY_MEMORY(temp, pRunningData, (size_t)bytesToWriteThisIteration);
+        drwav__bswap_samples(temp, sampleCount, bytesPerSample, pWav->translatedFormatTag);
+
+        bytesJustWritten = drwav_write_raw(pWav, (size_t)bytesToWriteThisIteration, temp);
+        if (bytesJustWritten == 0) {
+            break;
+        }
+
+        bytesToWrite -= bytesJustWritten;
+        bytesWritten += bytesJustWritten;
+        pRunningData += bytesJustWritten;
+    }
+
+    return (bytesWritten * 8) / pWav->bitsPerSample / pWav->channels;
+}
+
+DRWAV_API drwav_uint64 drwav_write_pcm_frames(drwav* pWav, drwav_uint64 framesToWrite, const void* pData)
+{
+    if (drwav__is_little_endian()) {
+        return drwav_write_pcm_frames_le(pWav, framesToWrite, pData);
+    } else {
+        return drwav_write_pcm_frames_be(pWav, framesToWrite, pData);
+    }
+}
+
+
+static drwav_uint64 drwav_read_pcm_frames_s16__msadpcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 totalFramesRead = 0;
+
+    DRWAV_ASSERT(pWav != NULL);
+    DRWAV_ASSERT(framesToRead > 0);
+    DRWAV_ASSERT(pBufferOut != NULL);
+
+    /* TODO: Lots of room for optimization here. */
+
+    while (framesToRead > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) {
+        /* If there are no cached frames we need to load a new block. */
+        if (pWav->msadpcm.cachedFrameCount == 0 && pWav->msadpcm.bytesRemainingInBlock == 0) {
+            if (pWav->channels == 1) {
+                /* Mono. */
+                drwav_uint8 header[7];
+                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
+                    return totalFramesRead;
+                }
+                pWav->msadpcm.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
+
+                pWav->msadpcm.predictor[0]     = header[0];
+                pWav->msadpcm.delta[0]         = drwav__bytes_to_s16(header + 1);
+                pWav->msadpcm.prevFrames[0][1] = (drwav_int32)drwav__bytes_to_s16(header + 3);
+                pWav->msadpcm.prevFrames[0][0] = (drwav_int32)drwav__bytes_to_s16(header + 5);
+                pWav->msadpcm.cachedFrames[2]  = pWav->msadpcm.prevFrames[0][0];
+                pWav->msadpcm.cachedFrames[3]  = pWav->msadpcm.prevFrames[0][1];
+                pWav->msadpcm.cachedFrameCount = 2;
+            } else {
+                /* Stereo. */
+                drwav_uint8 header[14];
+                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
+                    return totalFramesRead;
+                }
+                pWav->msadpcm.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
+
+                pWav->msadpcm.predictor[0] = header[0];
+                pWav->msadpcm.predictor[1] = header[1];
+                pWav->msadpcm.delta[0] = drwav__bytes_to_s16(header + 2);
+                pWav->msadpcm.delta[1] = drwav__bytes_to_s16(header + 4);
+                pWav->msadpcm.prevFrames[0][1] = (drwav_int32)drwav__bytes_to_s16(header + 6);
+                pWav->msadpcm.prevFrames[1][1] = (drwav_int32)drwav__bytes_to_s16(header + 8);
+                pWav->msadpcm.prevFrames[0][0] = (drwav_int32)drwav__bytes_to_s16(header + 10);
+                pWav->msadpcm.prevFrames[1][0] = (drwav_int32)drwav__bytes_to_s16(header + 12);
+
+                pWav->msadpcm.cachedFrames[0] = pWav->msadpcm.prevFrames[0][0];
+                pWav->msadpcm.cachedFrames[1] = pWav->msadpcm.prevFrames[1][0];
+                pWav->msadpcm.cachedFrames[2] = pWav->msadpcm.prevFrames[0][1];
+                pWav->msadpcm.cachedFrames[3] = pWav->msadpcm.prevFrames[1][1];
+                pWav->msadpcm.cachedFrameCount = 2;
+            }
+        }
+
+        /* Output anything that's cached. */
+        while (framesToRead > 0 && pWav->msadpcm.cachedFrameCount > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) {
+            drwav_uint32 iSample = 0;
+            for (iSample = 0; iSample < pWav->channels; iSample += 1) {
+                pBufferOut[iSample] = (drwav_int16)pWav->msadpcm.cachedFrames[(drwav_countof(pWav->msadpcm.cachedFrames) - (pWav->msadpcm.cachedFrameCount*pWav->channels)) + iSample];
+            }
+
+            pBufferOut      += pWav->channels;
+            framesToRead    -= 1;
+            totalFramesRead += 1;
+            pWav->compressed.iCurrentPCMFrame += 1;
+            pWav->msadpcm.cachedFrameCount -= 1;
+        }
+
+        if (framesToRead == 0) {
+            return totalFramesRead;
+        }
+
+
+        /*
+        If there's nothing left in the cache, just go ahead and load more. If there's nothing left to load in the current block we just continue to the next
+        loop iteration which will trigger the loading of a new block.
+        */
+        if (pWav->msadpcm.cachedFrameCount == 0) {
+            if (pWav->msadpcm.bytesRemainingInBlock == 0) {
+                continue;
+            } else {
+                static drwav_int32 adaptationTable[] = { 
+                    230, 230, 230, 230, 307, 409, 512, 614, 
+                    768, 614, 512, 409, 307, 230, 230, 230 
+                };
+                static drwav_int32 coeff1Table[] = { 256, 512, 0, 192, 240, 460,  392 };
+                static drwav_int32 coeff2Table[] = { 0,  -256, 0, 64,  0,  -208, -232 };
+
+                drwav_uint8 nibbles;
+                drwav_int32 nibble0;
+                drwav_int32 nibble1;
+
+                if (pWav->onRead(pWav->pUserData, &nibbles, 1) != 1) {
+                    return totalFramesRead;
+                }
+                pWav->msadpcm.bytesRemainingInBlock -= 1;
+
+                /* TODO: Optimize away these if statements. */
+                nibble0 = ((nibbles & 0xF0) >> 4); if ((nibbles & 0x80)) { nibble0 |= 0xFFFFFFF0UL; }
+                nibble1 = ((nibbles & 0x0F) >> 0); if ((nibbles & 0x08)) { nibble1 |= 0xFFFFFFF0UL; }
+
+                if (pWav->channels == 1) {
+                    /* Mono. */
+                    drwav_int32 newSample0;
+                    drwav_int32 newSample1;
+
+                    newSample0  = ((pWav->msadpcm.prevFrames[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevFrames[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8;
+                    newSample0 += nibble0 * pWav->msadpcm.delta[0];
+                    newSample0  = drwav_clamp(newSample0, -32768, 32767);
+
+                    pWav->msadpcm.delta[0] = (adaptationTable[((nibbles & 0xF0) >> 4)] * pWav->msadpcm.delta[0]) >> 8;
+                    if (pWav->msadpcm.delta[0] < 16) {
+                        pWav->msadpcm.delta[0] = 16;
+                    }
+
+                    pWav->msadpcm.prevFrames[0][0] = pWav->msadpcm.prevFrames[0][1];
+                    pWav->msadpcm.prevFrames[0][1] = newSample0;
+
+
+                    newSample1  = ((pWav->msadpcm.prevFrames[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevFrames[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8;
+                    newSample1 += nibble1 * pWav->msadpcm.delta[0];
+                    newSample1  = drwav_clamp(newSample1, -32768, 32767);
+
+                    pWav->msadpcm.delta[0] = (adaptationTable[((nibbles & 0x0F) >> 0)] * pWav->msadpcm.delta[0]) >> 8;
+                    if (pWav->msadpcm.delta[0] < 16) {
+                        pWav->msadpcm.delta[0] = 16;
+                    }
+
+                    pWav->msadpcm.prevFrames[0][0] = pWav->msadpcm.prevFrames[0][1];
+                    pWav->msadpcm.prevFrames[0][1] = newSample1;
+
+
+                    pWav->msadpcm.cachedFrames[2] = newSample0;
+                    pWav->msadpcm.cachedFrames[3] = newSample1;
+                    pWav->msadpcm.cachedFrameCount = 2;
+                } else {
+                    /* Stereo. */
+                    drwav_int32 newSample0;
+                    drwav_int32 newSample1;
+
+                    /* Left. */
+                    newSample0  = ((pWav->msadpcm.prevFrames[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevFrames[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8;
+                    newSample0 += nibble0 * pWav->msadpcm.delta[0];
+                    newSample0  = drwav_clamp(newSample0, -32768, 32767);
+
+                    pWav->msadpcm.delta[0] = (adaptationTable[((nibbles & 0xF0) >> 4)] * pWav->msadpcm.delta[0]) >> 8;
+                    if (pWav->msadpcm.delta[0] < 16) {
+                        pWav->msadpcm.delta[0] = 16;
+                    }
+
+                    pWav->msadpcm.prevFrames[0][0] = pWav->msadpcm.prevFrames[0][1];
+                    pWav->msadpcm.prevFrames[0][1] = newSample0;
+
+
+                    /* Right. */
+                    newSample1  = ((pWav->msadpcm.prevFrames[1][1] * coeff1Table[pWav->msadpcm.predictor[1]]) + (pWav->msadpcm.prevFrames[1][0] * coeff2Table[pWav->msadpcm.predictor[1]])) >> 8;
+                    newSample1 += nibble1 * pWav->msadpcm.delta[1];
+                    newSample1  = drwav_clamp(newSample1, -32768, 32767);
+
+                    pWav->msadpcm.delta[1] = (adaptationTable[((nibbles & 0x0F) >> 0)] * pWav->msadpcm.delta[1]) >> 8;
+                    if (pWav->msadpcm.delta[1] < 16) {
+                        pWav->msadpcm.delta[1] = 16;
+                    }
+
+                    pWav->msadpcm.prevFrames[1][0] = pWav->msadpcm.prevFrames[1][1];
+                    pWav->msadpcm.prevFrames[1][1] = newSample1;
+
+                    pWav->msadpcm.cachedFrames[2] = newSample0;
+                    pWav->msadpcm.cachedFrames[3] = newSample1;
+                    pWav->msadpcm.cachedFrameCount = 1;
+                }
+            }
+        }
+    }
+
+    return totalFramesRead;
+}
+
+
+static drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 totalFramesRead = 0;
+
+    DRWAV_ASSERT(pWav != NULL);
+    DRWAV_ASSERT(framesToRead > 0);
+    DRWAV_ASSERT(pBufferOut != NULL);
+
+    /* TODO: Lots of room for optimization here. */
+
+    while (framesToRead > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) {
+        /* If there are no cached samples we need to load a new block. */
+        if (pWav->ima.cachedFrameCount == 0 && pWav->ima.bytesRemainingInBlock == 0) {
+            if (pWav->channels == 1) {
+                /* Mono. */
+                drwav_uint8 header[4];
+                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
+                    return totalFramesRead;
+                }
+                pWav->ima.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
+
+                pWav->ima.predictor[0] = drwav__bytes_to_s16(header + 0);
+                pWav->ima.stepIndex[0] = header[2];
+                pWav->ima.cachedFrames[drwav_countof(pWav->ima.cachedFrames) - 1] = pWav->ima.predictor[0];
+                pWav->ima.cachedFrameCount = 1;
+            } else {
+                /* Stereo. */
+                drwav_uint8 header[8];
+                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
+                    return totalFramesRead;
+                }
+                pWav->ima.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
+
+                pWav->ima.predictor[0] = drwav__bytes_to_s16(header + 0);
+                pWav->ima.stepIndex[0] = header[2];
+                pWav->ima.predictor[1] = drwav__bytes_to_s16(header + 4);
+                pWav->ima.stepIndex[1] = header[6];
+
+                pWav->ima.cachedFrames[drwav_countof(pWav->ima.cachedFrames) - 2] = pWav->ima.predictor[0];
+                pWav->ima.cachedFrames[drwav_countof(pWav->ima.cachedFrames) - 1] = pWav->ima.predictor[1];
+                pWav->ima.cachedFrameCount = 1;
+            }
+        }
+
+        /* Output anything that's cached. */
+        while (framesToRead > 0 && pWav->ima.cachedFrameCount > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) {
+            drwav_uint32 iSample;
+            for (iSample = 0; iSample < pWav->channels; iSample += 1) {
+                pBufferOut[iSample] = (drwav_int16)pWav->ima.cachedFrames[(drwav_countof(pWav->ima.cachedFrames) - (pWav->ima.cachedFrameCount*pWav->channels)) + iSample];
+            }
+
+            pBufferOut      += pWav->channels;
+            framesToRead    -= 1;
+            totalFramesRead += 1;
+            pWav->compressed.iCurrentPCMFrame += 1;
+            pWav->ima.cachedFrameCount -= 1;
+        }
+
+        if (framesToRead == 0) {
+            return totalFramesRead;
+        }
+
+        /*
+        If there's nothing left in the cache, just go ahead and load more. If there's nothing left to load in the current block we just continue to the next
+        loop iteration which will trigger the loading of a new block.
+        */
+        if (pWav->ima.cachedFrameCount == 0) {
+            if (pWav->ima.bytesRemainingInBlock == 0) {
+                continue;
+            } else {
+                static drwav_int32 indexTable[16] = {
+                    -1, -1, -1, -1, 2, 4, 6, 8,
+                    -1, -1, -1, -1, 2, 4, 6, 8
+                };
+
+                static drwav_int32 stepTable[89] = {
+                    7,     8,     9,     10,    11,    12,    13,    14,    16,    17, 
+                    19,    21,    23,    25,    28,    31,    34,    37,    41,    45, 
+                    50,    55,    60,    66,    73,    80,    88,    97,    107,   118, 
+                    130,   143,   157,   173,   190,   209,   230,   253,   279,   307,
+                    337,   371,   408,   449,   494,   544,   598,   658,   724,   796,
+                    876,   963,   1060,  1166,  1282,  1411,  1552,  1707,  1878,  2066, 
+                    2272,  2499,  2749,  3024,  3327,  3660,  4026,  4428,  4871,  5358,
+                    5894,  6484,  7132,  7845,  8630,  9493,  10442, 11487, 12635, 13899, 
+                    15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767 
+                };
+
+                drwav_uint32 iChannel;
+
+                /*
+                From what I can tell with stereo streams, it looks like every 4 bytes (8 samples) is for one channel. So it goes 4 bytes for the
+                left channel, 4 bytes for the right channel.
+                */
+                pWav->ima.cachedFrameCount = 8;
+                for (iChannel = 0; iChannel < pWav->channels; ++iChannel) {
+                    drwav_uint32 iByte;
+                    drwav_uint8 nibbles[4];
+                    if (pWav->onRead(pWav->pUserData, &nibbles, 4) != 4) {
+                        pWav->ima.cachedFrameCount = 0;
+                        return totalFramesRead;
+                    }
+                    pWav->ima.bytesRemainingInBlock -= 4;
+
+                    for (iByte = 0; iByte < 4; ++iByte) {
+                        drwav_uint8 nibble0 = ((nibbles[iByte] & 0x0F) >> 0);
+                        drwav_uint8 nibble1 = ((nibbles[iByte] & 0xF0) >> 4);
+
+                        drwav_int32 step      = stepTable[pWav->ima.stepIndex[iChannel]];
+                        drwav_int32 predictor = pWav->ima.predictor[iChannel];
+
+                        drwav_int32      diff  = step >> 3;
+                        if (nibble0 & 1) diff += step >> 2;
+                        if (nibble0 & 2) diff += step >> 1;
+                        if (nibble0 & 4) diff += step;
+                        if (nibble0 & 8) diff  = -diff;
+
+                        predictor = drwav_clamp(predictor + diff, -32768, 32767);
+                        pWav->ima.predictor[iChannel] = predictor;
+                        pWav->ima.stepIndex[iChannel] = drwav_clamp(pWav->ima.stepIndex[iChannel] + indexTable[nibble0], 0, (drwav_int32)drwav_countof(stepTable)-1);
+                        pWav->ima.cachedFrames[(drwav_countof(pWav->ima.cachedFrames) - (pWav->ima.cachedFrameCount*pWav->channels)) + (iByte*2+0)*pWav->channels + iChannel] = predictor;
+
+
+                        step      = stepTable[pWav->ima.stepIndex[iChannel]];
+                        predictor = pWav->ima.predictor[iChannel];
+
+                                         diff  = step >> 3;
+                        if (nibble1 & 1) diff += step >> 2;
+                        if (nibble1 & 2) diff += step >> 1;
+                        if (nibble1 & 4) diff += step;
+                        if (nibble1 & 8) diff  = -diff;
+
+                        predictor = drwav_clamp(predictor + diff, -32768, 32767);
+                        pWav->ima.predictor[iChannel] = predictor;
+                        pWav->ima.stepIndex[iChannel] = drwav_clamp(pWav->ima.stepIndex[iChannel] + indexTable[nibble1], 0, (drwav_int32)drwav_countof(stepTable)-1);
+                        pWav->ima.cachedFrames[(drwav_countof(pWav->ima.cachedFrames) - (pWav->ima.cachedFrameCount*pWav->channels)) + (iByte*2+1)*pWav->channels + iChannel] = predictor;
+                    }
+                }
+            }
+        }
+    }
+
+    return totalFramesRead;
+}
+
+
+#ifndef DR_WAV_NO_CONVERSION_API
+static unsigned short g_drwavAlawTable[256] = {
+    0xEA80, 0xEB80, 0xE880, 0xE980, 0xEE80, 0xEF80, 0xEC80, 0xED80, 0xE280, 0xE380, 0xE080, 0xE180, 0xE680, 0xE780, 0xE480, 0xE580, 
+    0xF540, 0xF5C0, 0xF440, 0xF4C0, 0xF740, 0xF7C0, 0xF640, 0xF6C0, 0xF140, 0xF1C0, 0xF040, 0xF0C0, 0xF340, 0xF3C0, 0xF240, 0xF2C0, 
+    0xAA00, 0xAE00, 0xA200, 0xA600, 0xBA00, 0xBE00, 0xB200, 0xB600, 0x8A00, 0x8E00, 0x8200, 0x8600, 0x9A00, 0x9E00, 0x9200, 0x9600, 
+    0xD500, 0xD700, 0xD100, 0xD300, 0xDD00, 0xDF00, 0xD900, 0xDB00, 0xC500, 0xC700, 0xC100, 0xC300, 0xCD00, 0xCF00, 0xC900, 0xCB00, 
+    0xFEA8, 0xFEB8, 0xFE88, 0xFE98, 0xFEE8, 0xFEF8, 0xFEC8, 0xFED8, 0xFE28, 0xFE38, 0xFE08, 0xFE18, 0xFE68, 0xFE78, 0xFE48, 0xFE58, 
+    0xFFA8, 0xFFB8, 0xFF88, 0xFF98, 0xFFE8, 0xFFF8, 0xFFC8, 0xFFD8, 0xFF28, 0xFF38, 0xFF08, 0xFF18, 0xFF68, 0xFF78, 0xFF48, 0xFF58, 
+    0xFAA0, 0xFAE0, 0xFA20, 0xFA60, 0xFBA0, 0xFBE0, 0xFB20, 0xFB60, 0xF8A0, 0xF8E0, 0xF820, 0xF860, 0xF9A0, 0xF9E0, 0xF920, 0xF960, 
+    0xFD50, 0xFD70, 0xFD10, 0xFD30, 0xFDD0, 0xFDF0, 0xFD90, 0xFDB0, 0xFC50, 0xFC70, 0xFC10, 0xFC30, 0xFCD0, 0xFCF0, 0xFC90, 0xFCB0, 
+    0x1580, 0x1480, 0x1780, 0x1680, 0x1180, 0x1080, 0x1380, 0x1280, 0x1D80, 0x1C80, 0x1F80, 0x1E80, 0x1980, 0x1880, 0x1B80, 0x1A80, 
+    0x0AC0, 0x0A40, 0x0BC0, 0x0B40, 0x08C0, 0x0840, 0x09C0, 0x0940, 0x0EC0, 0x0E40, 0x0FC0, 0x0F40, 0x0CC0, 0x0C40, 0x0DC0, 0x0D40, 
+    0x5600, 0x5200, 0x5E00, 0x5A00, 0x4600, 0x4200, 0x4E00, 0x4A00, 0x7600, 0x7200, 0x7E00, 0x7A00, 0x6600, 0x6200, 0x6E00, 0x6A00, 
+    0x2B00, 0x2900, 0x2F00, 0x2D00, 0x2300, 0x2100, 0x2700, 0x2500, 0x3B00, 0x3900, 0x3F00, 0x3D00, 0x3300, 0x3100, 0x3700, 0x3500, 
+    0x0158, 0x0148, 0x0178, 0x0168, 0x0118, 0x0108, 0x0138, 0x0128, 0x01D8, 0x01C8, 0x01F8, 0x01E8, 0x0198, 0x0188, 0x01B8, 0x01A8, 
+    0x0058, 0x0048, 0x0078, 0x0068, 0x0018, 0x0008, 0x0038, 0x0028, 0x00D8, 0x00C8, 0x00F8, 0x00E8, 0x0098, 0x0088, 0x00B8, 0x00A8, 
+    0x0560, 0x0520, 0x05E0, 0x05A0, 0x0460, 0x0420, 0x04E0, 0x04A0, 0x0760, 0x0720, 0x07E0, 0x07A0, 0x0660, 0x0620, 0x06E0, 0x06A0, 
+    0x02B0, 0x0290, 0x02F0, 0x02D0, 0x0230, 0x0210, 0x0270, 0x0250, 0x03B0, 0x0390, 0x03F0, 0x03D0, 0x0330, 0x0310, 0x0370, 0x0350
+};
+
+static unsigned short g_drwavMulawTable[256] = {
+    0x8284, 0x8684, 0x8A84, 0x8E84, 0x9284, 0x9684, 0x9A84, 0x9E84, 0xA284, 0xA684, 0xAA84, 0xAE84, 0xB284, 0xB684, 0xBA84, 0xBE84, 
+    0xC184, 0xC384, 0xC584, 0xC784, 0xC984, 0xCB84, 0xCD84, 0xCF84, 0xD184, 0xD384, 0xD584, 0xD784, 0xD984, 0xDB84, 0xDD84, 0xDF84, 
+    0xE104, 0xE204, 0xE304, 0xE404, 0xE504, 0xE604, 0xE704, 0xE804, 0xE904, 0xEA04, 0xEB04, 0xEC04, 0xED04, 0xEE04, 0xEF04, 0xF004, 
+    0xF0C4, 0xF144, 0xF1C4, 0xF244, 0xF2C4, 0xF344, 0xF3C4, 0xF444, 0xF4C4, 0xF544, 0xF5C4, 0xF644, 0xF6C4, 0xF744, 0xF7C4, 0xF844, 
+    0xF8A4, 0xF8E4, 0xF924, 0xF964, 0xF9A4, 0xF9E4, 0xFA24, 0xFA64, 0xFAA4, 0xFAE4, 0xFB24, 0xFB64, 0xFBA4, 0xFBE4, 0xFC24, 0xFC64, 
+    0xFC94, 0xFCB4, 0xFCD4, 0xFCF4, 0xFD14, 0xFD34, 0xFD54, 0xFD74, 0xFD94, 0xFDB4, 0xFDD4, 0xFDF4, 0xFE14, 0xFE34, 0xFE54, 0xFE74, 
+    0xFE8C, 0xFE9C, 0xFEAC, 0xFEBC, 0xFECC, 0xFEDC, 0xFEEC, 0xFEFC, 0xFF0C, 0xFF1C, 0xFF2C, 0xFF3C, 0xFF4C, 0xFF5C, 0xFF6C, 0xFF7C, 
+    0xFF88, 0xFF90, 0xFF98, 0xFFA0, 0xFFA8, 0xFFB0, 0xFFB8, 0xFFC0, 0xFFC8, 0xFFD0, 0xFFD8, 0xFFE0, 0xFFE8, 0xFFF0, 0xFFF8, 0x0000, 
+    0x7D7C, 0x797C, 0x757C, 0x717C, 0x6D7C, 0x697C, 0x657C, 0x617C, 0x5D7C, 0x597C, 0x557C, 0x517C, 0x4D7C, 0x497C, 0x457C, 0x417C, 
+    0x3E7C, 0x3C7C, 0x3A7C, 0x387C, 0x367C, 0x347C, 0x327C, 0x307C, 0x2E7C, 0x2C7C, 0x2A7C, 0x287C, 0x267C, 0x247C, 0x227C, 0x207C, 
+    0x1EFC, 0x1DFC, 0x1CFC, 0x1BFC, 0x1AFC, 0x19FC, 0x18FC, 0x17FC, 0x16FC, 0x15FC, 0x14FC, 0x13FC, 0x12FC, 0x11FC, 0x10FC, 0x0FFC, 
+    0x0F3C, 0x0EBC, 0x0E3C, 0x0DBC, 0x0D3C, 0x0CBC, 0x0C3C, 0x0BBC, 0x0B3C, 0x0ABC, 0x0A3C, 0x09BC, 0x093C, 0x08BC, 0x083C, 0x07BC, 
+    0x075C, 0x071C, 0x06DC, 0x069C, 0x065C, 0x061C, 0x05DC, 0x059C, 0x055C, 0x051C, 0x04DC, 0x049C, 0x045C, 0x041C, 0x03DC, 0x039C, 
+    0x036C, 0x034C, 0x032C, 0x030C, 0x02EC, 0x02CC, 0x02AC, 0x028C, 0x026C, 0x024C, 0x022C, 0x020C, 0x01EC, 0x01CC, 0x01AC, 0x018C, 
+    0x0174, 0x0164, 0x0154, 0x0144, 0x0134, 0x0124, 0x0114, 0x0104, 0x00F4, 0x00E4, 0x00D4, 0x00C4, 0x00B4, 0x00A4, 0x0094, 0x0084, 
+    0x0078, 0x0070, 0x0068, 0x0060, 0x0058, 0x0050, 0x0048, 0x0040, 0x0038, 0x0030, 0x0028, 0x0020, 0x0018, 0x0010, 0x0008, 0x0000
+};
+
+static DRWAV_INLINE drwav_int16 drwav__alaw_to_s16(drwav_uint8 sampleIn)
+{
+    return (short)g_drwavAlawTable[sampleIn];
+}
+
+static DRWAV_INLINE drwav_int16 drwav__mulaw_to_s16(drwav_uint8 sampleIn)
+{
+    return (short)g_drwavMulawTable[sampleIn];
+}
+
+
+
+static void drwav__pcm_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
+{
+    unsigned int i;
+
+    /* Special case for 8-bit sample data because it's treated as unsigned. */
+    if (bytesPerSample == 1) {
+        drwav_u8_to_s16(pOut, pIn, totalSampleCount);
+        return;
+    }
+
+
+    /* Slightly more optimal implementation for common formats. */
+    if (bytesPerSample == 2) {
+        for (i = 0; i < totalSampleCount; ++i) {
+           *pOut++ = ((const drwav_int16*)pIn)[i];
+        }
+        return;
+    }
+    if (bytesPerSample == 3) {
+        drwav_s24_to_s16(pOut, pIn, totalSampleCount);
+        return;
+    }
+    if (bytesPerSample == 4) {
+        drwav_s32_to_s16(pOut, (const drwav_int32*)pIn, totalSampleCount);
+        return;
+    }
+
+
+    /* Anything more than 64 bits per sample is not supported. */
+    if (bytesPerSample > 8) {
+        DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut));
+        return;
+    }
+
+
+    /* Generic, slow converter. */
+    for (i = 0; i < totalSampleCount; ++i) {
+        drwav_uint64 sample = 0;
+        unsigned int shift  = (8 - bytesPerSample) * 8;
+
+        unsigned int j;
+        for (j = 0; j < bytesPerSample; j += 1) {
+            DRWAV_ASSERT(j < 8);
+            sample |= (drwav_uint64)(pIn[j]) << shift;
+            shift  += 8;
+        }
+
+        pIn += j;
+        *pOut++ = (drwav_int16)((drwav_int64)sample >> 48);
+    }
+}
+
+static void drwav__ieee_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
+{
+    if (bytesPerSample == 4) {
+        drwav_f32_to_s16(pOut, (const double*)pIn, totalSampleCount);
+        return;
+    } else if (bytesPerSample == 8) {
+        drwav_f64_to_s16(pOut, (const double*)pIn, totalSampleCount);
+        return;
+    } else {
+        /* Only supporting 32- and 64-bit double. Output silence in all other cases. Contributions welcome for 16-bit double. */
+        DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut));
+        return;
+    }
+}
+
+static drwav_uint64 drwav_read_pcm_frames_s16__pcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint32 bytesPerFrame;
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+
+    /* Fast path. */
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bitsPerSample == 16) {
+        return drwav_read_pcm_frames(pWav, framesToRead, pBufferOut);
+    }
+    
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+    
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav__pcm_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_s16__ieee(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+
+    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+    
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav__ieee_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_s16__alaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+
+    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+    
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav_alaw_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_s16__mulaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+
+    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav_mulaw_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) {
+        return 0;
+    }
+
+    /* Don't try to read more samples than can potentially fit in the output buffer. */
+    if (framesToRead * pWav->channels * sizeof(drwav_int16) > DRWAV_SIZE_MAX) {
+        framesToRead = DRWAV_SIZE_MAX / sizeof(drwav_int16) / pWav->channels;
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) {
+        return drwav_read_pcm_frames_s16__pcm(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) {
+        return drwav_read_pcm_frames_s16__ieee(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW) {
+        return drwav_read_pcm_frames_s16__alaw(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) {
+        return drwav_read_pcm_frames_s16__mulaw(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+        return drwav_read_pcm_frames_s16__msadpcm(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+        return drwav_read_pcm_frames_s16__ima(pWav, framesToRead, pBufferOut);
+    }
+
+    return 0;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16le(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, framesToRead, pBufferOut);
+    if (!drwav__is_little_endian()) {
+        drwav__bswap_samples_s16(pBufferOut, framesRead*pWav->channels);
+    }
+
+    return framesRead;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16be(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, framesToRead, pBufferOut);
+    if (drwav__is_little_endian()) {
+        drwav__bswap_samples_s16(pBufferOut, framesRead*pWav->channels);
+    }
+
+    return framesRead;
+}
+
+
+DRWAV_API void drwav_u8_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    int r;
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
+        int x = pIn[i];
+        r = x << 8;
+        r = r - 32768;
+        pOut[i] = (short)r;
+    }
+}
+
+DRWAV_API void drwav_s24_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    int r;
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
+        int x = ((int)(((unsigned int)(((const drwav_uint8*)pIn)[i*3+0]) << 8) | ((unsigned int)(((const drwav_uint8*)pIn)[i*3+1]) << 16) | ((unsigned int)(((const drwav_uint8*)pIn)[i*3+2])) << 24)) >> 8;
+        r = x >> 8;
+        pOut[i] = (short)r;
+    }
+}
+
+DRWAV_API void drwav_s32_to_s16(drwav_int16* pOut, const drwav_int32* pIn, size_t sampleCount)
+{
+    int r;
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
+        int x = pIn[i];
+        r = x >> 16;
+        pOut[i] = (short)r;
+    }
+}
+
+DRWAV_API void drwav_f32_to_s16(drwav_int16* pOut, const double* pIn, size_t sampleCount)
+{
+    int r;
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
+        double x = pIn[i];
+        double c;
+        c = ((x < -1) ? -1 : ((x > 1) ? 1 : x));
+        c = c + 1;
+        r = (int)(c * 32767.5f);
+        r = r - 32768;
+        pOut[i] = (short)r;
+    }
+}
+
+DRWAV_API void drwav_f64_to_s16(drwav_int16* pOut, const double* pIn, size_t sampleCount)
+{
+    int r;
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
+        double x = pIn[i];
+        double c;
+        c = ((x < -1) ? -1 : ((x > 1) ? 1 : x));
+        c = c + 1;
+        r = (int)(c * 32767.5);
+        r = r - 32768;
+        pOut[i] = (short)r;
+    }
+}
+
+DRWAV_API void drwav_alaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
+        pOut[i] = drwav__alaw_to_s16(pIn[i]);
+    }
+}
+
+DRWAV_API void drwav_mulaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
+        pOut[i] = drwav__mulaw_to_s16(pIn[i]);
+    }
+}
+
+
+
+static void drwav__pcm_to_f32(double* pOut, const drwav_uint8* pIn, size_t sampleCount, unsigned int bytesPerSample)
+{
+    unsigned int i;
+
+    /* Special case for 8-bit sample data because it's treated as unsigned. */
+    if (bytesPerSample == 1) {
+        drwav_u8_to_f32(pOut, pIn, sampleCount);
+        return;
+    }
+
+    /* Slightly more optimal implementation for common formats. */
+    if (bytesPerSample == 2) {
+        drwav_s16_to_f32(pOut, (const drwav_int16*)pIn, sampleCount);
+        return;
+    }
+    if (bytesPerSample == 3) {
+        drwav_s24_to_f32(pOut, pIn, sampleCount);
+        return;
+    }
+    if (bytesPerSample == 4) {
+        drwav_s32_to_f32(pOut, (const drwav_int32*)pIn, sampleCount);
+        return;
+    }
+
+
+    /* Anything more than 64 bits per sample is not supported. */
+    if (bytesPerSample > 8) {
+        DRWAV_ZERO_MEMORY(pOut, sampleCount * sizeof(*pOut));
+        return;
+    }
+
+
+    /* Generic, slow converter. */
+    for (i = 0; i < sampleCount; ++i) {
+        drwav_uint64 sample = 0;
+        unsigned int shift  = (8 - bytesPerSample) * 8;
+
+        unsigned int j;
+        for (j = 0; j < bytesPerSample; j += 1) {
+            DRWAV_ASSERT(j < 8);
+            sample |= (drwav_uint64)(pIn[j]) << shift;
+            shift  += 8;
+        }
+
+        pIn += j;
+        *pOut++ = (double)((drwav_int64)sample / 9223372036854775807.0);
+    }
+}
+
+static void drwav__ieee_to_f32(double* pOut, const drwav_uint8* pIn, size_t sampleCount, unsigned int bytesPerSample)
+{
+    if (bytesPerSample == 4) {
+        unsigned int i;
+        for (i = 0; i < sampleCount; ++i) {
+            *pOut++ = ((const double*)pIn)[i];
+        }
+        return;
+    } else if (bytesPerSample == 8) {
+        drwav_f64_to_f32(pOut, (const double*)pIn, sampleCount);
+        return;
+    } else {
+        /* Only supporting 32- and 64-bit double. Output silence in all other cases. Contributions welcome for 16-bit double. */
+        DRWAV_ZERO_MEMORY(pOut, sampleCount * sizeof(*pOut));
+        return;
+    }
+}
+
+
+static drwav_uint64 drwav_read_pcm_frames_f32__pcm(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+
+    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav__pcm_to_f32(pBufferOut, sampleData, (size_t)framesRead*pWav->channels, bytesPerFrame/pWav->channels);
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_f32__msadpcm(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut)
+{
+    /*
+    We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't
+    want to duplicate that code.
+    */
+    drwav_uint64 totalFramesRead = 0;
+    drwav_int16 samples16[2048];
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav_s16_to_f32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels));   /* <-- Safe cast because we're clamping to 2048. */
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_f32__ima(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut)
+{
+    /*
+    We're just going to borrow the implementation from the drwav_read_s16() since IMA-ADPCM is a little bit more complicated than other formats and I don't
+    want to duplicate that code.
+    */
+    drwav_uint64 totalFramesRead = 0;
+    drwav_int16 samples16[2048];
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav_s16_to_f32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels));   /* <-- Safe cast because we're clamping to 2048. */
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_f32__ieee(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+    drwav_uint32 bytesPerFrame;
+
+    /* Fast path. */
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT && pWav->bitsPerSample == 32) {
+        return drwav_read_pcm_frames(pWav, framesToRead, pBufferOut);
+    }
+    
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav__ieee_to_f32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_f32__alaw(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav_alaw_to_f32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_f32__mulaw(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+
+    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav_mulaw_to_f32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut)
+{
+    if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) {
+        return 0;
+    }
+
+    /* Don't try to read more samples than can potentially fit in the output buffer. */
+    if (framesToRead * pWav->channels * sizeof(double) > DRWAV_SIZE_MAX) {
+        framesToRead = DRWAV_SIZE_MAX / sizeof(double) / pWav->channels;
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) {
+        return drwav_read_pcm_frames_f32__pcm(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+        return drwav_read_pcm_frames_f32__msadpcm(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) {
+        return drwav_read_pcm_frames_f32__ieee(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW) {
+        return drwav_read_pcm_frames_f32__alaw(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) {
+        return drwav_read_pcm_frames_f32__mulaw(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+        return drwav_read_pcm_frames_f32__ima(pWav, framesToRead, pBufferOut);
+    }
+
+    return 0;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32le(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut)
+{
+    drwav_uint64 framesRead = drwav_read_pcm_frames_f32(pWav, framesToRead, pBufferOut);
+    if (!drwav__is_little_endian()) {
+        drwav__bswap_samples_f32(pBufferOut, framesRead*pWav->channels);
+    }
+
+    return framesRead;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32be(drwav* pWav, drwav_uint64 framesToRead, double* pBufferOut)
+{
+    drwav_uint64 framesRead = drwav_read_pcm_frames_f32(pWav, framesToRead, pBufferOut);
+    if (drwav__is_little_endian()) {
+        drwav__bswap_samples_f32(pBufferOut, framesRead*pWav->channels);
+    }
+
+    return framesRead;
+}
+
+
+DRWAV_API void drwav_u8_to_f32(double* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+#ifdef DR_WAV_LIBSNDFILE_COMPAT
+    /*
+    It appears libsndfile uses slightly different logic for the u8 -> f32 conversion to dr_wav, which in my opinion is incorrect. It appears
+    libsndfile performs the conversion something like "f32 = (u8 / 256) * 2 - 1", however I think it should be "f32 = (u8 / 255) * 2 - 1" (note
+    the divisor of 256 vs 255). I use libsndfile as a benchmark for testing, so I'm therefore leaving this block here just for my automated
+    correctness testing. This is disabled by default.
+    */
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = (pIn[i] / 256.0f) * 2 - 1;
+    }
+#else
+    for (i = 0; i < sampleCount; ++i) {
+        double x = pIn[i];
+        x = x * 0.00784313725490196078f;    /* 0..255 to 0..2 */
+        x = x - 1;                          /* 0..2 to -1..1 */
+
+        *pOut++ = x;
+    }
+#endif
+}
+
+DRWAV_API void drwav_s16_to_f32(double* pOut, const drwav_int16* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = pIn[i] * 0.000030517578125f;
+    }
+}
+
+DRWAV_API void drwav_s24_to_f32(double* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        double x = (double)(((drwav_int32)(((drwav_uint32)(pIn[i*3+0]) << 8) | ((drwav_uint32)(pIn[i*3+1]) << 16) | ((drwav_uint32)(pIn[i*3+2])) << 24)) >> 8);
+        *pOut++ = (double)(x * 0.00000011920928955078125);
+    }
+}
+
+DRWAV_API void drwav_s32_to_f32(double* pOut, const drwav_int32* pIn, size_t sampleCount)
+{
+    size_t i;
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = (double)(pIn[i] / 2147483648.0);
+    }
+}
+
+DRWAV_API void drwav_f64_to_f32(double* pOut, const double* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = (double)pIn[i];
+    }
+}
+
+DRWAV_API void drwav_alaw_to_f32(double* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = drwav__alaw_to_s16(pIn[i]) / 32768.0f;
+    }
+}
+
+DRWAV_API void drwav_mulaw_to_f32(double* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = drwav__mulaw_to_s16(pIn[i]) / 32768.0f;
+    }
+}
+
+
+
+static void drwav__pcm_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
+{
+    unsigned int i;
+
+    /* Special case for 8-bit sample data because it's treated as unsigned. */
+    if (bytesPerSample == 1) {
+        drwav_u8_to_s32(pOut, pIn, totalSampleCount);
+        return;
+    }
+
+    /* Slightly more optimal implementation for common formats. */
+    if (bytesPerSample == 2) {
+        drwav_s16_to_s32(pOut, (const drwav_int16*)pIn, totalSampleCount);
+        return;
+    }
+    if (bytesPerSample == 3) {
+        drwav_s24_to_s32(pOut, pIn, totalSampleCount);
+        return;
+    }
+    if (bytesPerSample == 4) {
+        for (i = 0; i < totalSampleCount; ++i) {
+           *pOut++ = ((const drwav_int32*)pIn)[i];
+        }
+        return;
+    }
+
+
+    /* Anything more than 64 bits per sample is not supported. */
+    if (bytesPerSample > 8) {
+        DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut));
+        return;
+    }
+
+
+    /* Generic, slow converter. */
+    for (i = 0; i < totalSampleCount; ++i) {
+        drwav_uint64 sample = 0;
+        unsigned int shift  = (8 - bytesPerSample) * 8;
+
+        unsigned int j;
+        for (j = 0; j < bytesPerSample; j += 1) {
+            DRWAV_ASSERT(j < 8);
+            sample |= (drwav_uint64)(pIn[j]) << shift;
+            shift  += 8;
+        }
+
+        pIn += j;
+        *pOut++ = (drwav_int32)((drwav_int64)sample >> 32);
+    }
+}
+
+static void drwav__ieee_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
+{
+    if (bytesPerSample == 4) {
+        drwav_f32_to_s32(pOut, (const double*)pIn, totalSampleCount);
+        return;
+    } else if (bytesPerSample == 8) {
+        drwav_f64_to_s32(pOut, (const double*)pIn, totalSampleCount);
+        return;
+    } else {
+        /* Only supporting 32- and 64-bit double. Output silence in all other cases. Contributions welcome for 16-bit double. */
+        DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut));
+        return;
+    }
+}
+
+
+static drwav_uint64 drwav_read_pcm_frames_s32__pcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+    drwav_uint32 bytesPerFrame;
+
+    /* Fast path. */
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bitsPerSample == 32) {
+        return drwav_read_pcm_frames(pWav, framesToRead, pBufferOut);
+    }
+    
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav__pcm_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_s32__msadpcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    /*
+    We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't
+    want to duplicate that code.
+    */
+    drwav_uint64 totalFramesRead = 0;
+    drwav_int16 samples16[2048];
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav_s16_to_s32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels));   /* <-- Safe cast because we're clamping to 2048. */
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_s32__ima(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    /*
+    We're just going to borrow the implementation from the drwav_read_s16() since IMA-ADPCM is a little bit more complicated than other formats and I don't
+    want to duplicate that code.
+    */
+    drwav_uint64 totalFramesRead = 0;
+    drwav_int16 samples16[2048];
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav_s16_to_s32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels));   /* <-- Safe cast because we're clamping to 2048. */
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_s32__ieee(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+
+    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav__ieee_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_s32__alaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+
+    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav_alaw_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_s32__mulaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+
+    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav_mulaw_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) {
+        return 0;
+    }
+
+    /* Don't try to read more samples than can potentially fit in the output buffer. */
+    if (framesToRead * pWav->channels * sizeof(drwav_int32) > DRWAV_SIZE_MAX) {
+        framesToRead = DRWAV_SIZE_MAX / sizeof(drwav_int32) / pWav->channels;
+    }
+
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) {
+        return drwav_read_pcm_frames_s32__pcm(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+        return drwav_read_pcm_frames_s32__msadpcm(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) {
+        return drwav_read_pcm_frames_s32__ieee(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW) {
+        return drwav_read_pcm_frames_s32__alaw(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) {
+        return drwav_read_pcm_frames_s32__mulaw(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+        return drwav_read_pcm_frames_s32__ima(pWav, framesToRead, pBufferOut);
+    }
+
+    return 0;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32le(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    drwav_uint64 framesRead = drwav_read_pcm_frames_s32(pWav, framesToRead, pBufferOut);
+    if (!drwav__is_little_endian()) {
+        drwav__bswap_samples_s32(pBufferOut, framesRead*pWav->channels);
+    }
+
+    return framesRead;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32be(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    drwav_uint64 framesRead = drwav_read_pcm_frames_s32(pWav, framesToRead, pBufferOut);
+    if (drwav__is_little_endian()) {
+        drwav__bswap_samples_s32(pBufferOut, framesRead*pWav->channels);
+    }
+
+    return framesRead;
+}
+
+
+DRWAV_API void drwav_u8_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = ((int)pIn[i] - 128) << 24;
+    }
+}
+
+DRWAV_API void drwav_s16_to_s32(drwav_int32* pOut, const drwav_int16* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = pIn[i] << 16;
+    }
+}
+
+DRWAV_API void drwav_s24_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        unsigned int s0 = pIn[i*3 + 0];
+        unsigned int s1 = pIn[i*3 + 1];
+        unsigned int s2 = pIn[i*3 + 2];
+
+        drwav_int32 sample32 = (drwav_int32)((s0 << 8) | (s1 << 16) | (s2 << 24));
+        *pOut++ = sample32;
+    }
+}
+
+DRWAV_API void drwav_f32_to_s32(drwav_int32* pOut, const double* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = (drwav_int32)(2147483648.0 * pIn[i]);
+    }
+}
+
+DRWAV_API void drwav_f64_to_s32(drwav_int32* pOut, const double* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = (drwav_int32)(2147483648.0 * pIn[i]);
+    }
+}
+
+DRWAV_API void drwav_alaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = ((drwav_int32)drwav__alaw_to_s16(pIn[i])) << 16;
+    }
+}
+
+DRWAV_API void drwav_mulaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i= 0; i < sampleCount; ++i) {
+        *pOut++ = ((drwav_int32)drwav__mulaw_to_s16(pIn[i])) << 16;
+    }
+}
+
+
+
+static drwav_int16* drwav__read_pcm_frames_and_close_s16(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount)
+{
+    drwav_uint64 sampleDataSize;
+    drwav_int16* pSampleData;
+    drwav_uint64 framesRead;
+
+    DRWAV_ASSERT(pWav != NULL);
+
+    sampleDataSize = pWav->totalPCMFrameCount * pWav->channels * sizeof(drwav_int16);
+    if (sampleDataSize > DRWAV_SIZE_MAX) {
+        drwav_uninit(pWav);
+        return NULL;    /* File's too big. */
+    }
+
+    pSampleData = (drwav_int16*)drwav__malloc_from_callbacks((size_t)sampleDataSize, &pWav->allocationCallbacks); /* <-- Safe cast due to the check above. */
+    if (pSampleData == NULL) {
+        drwav_uninit(pWav);
+        return NULL;    /* Failed to allocate memory. */
+    }
+
+    framesRead = drwav_read_pcm_frames_s16(pWav, (size_t)pWav->totalPCMFrameCount, pSampleData);
+    if (framesRead != pWav->totalPCMFrameCount) {
+        drwav__free_from_callbacks(pSampleData, &pWav->allocationCallbacks);
+        drwav_uninit(pWav);
+        return NULL;    /* There was an error reading the samples. */
+    }
+
+    drwav_uninit(pWav);
+
+    if (sampleRate) {
+        *sampleRate = pWav->sampleRate;
+    }
+    if (channels) {
+        *channels = pWav->channels;
+    }
+    if (totalFrameCount) {
+        *totalFrameCount = pWav->totalPCMFrameCount;
+    }
+
+    return pSampleData;
+}
+
+static double* drwav__read_pcm_frames_and_close_f32(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount)
+{
+    drwav_uint64 sampleDataSize;
+    double* pSampleData;
+    drwav_uint64 framesRead;
+
+    DRWAV_ASSERT(pWav != NULL);
+
+    sampleDataSize = pWav->totalPCMFrameCount * pWav->channels * sizeof(double);
+    if (sampleDataSize > DRWAV_SIZE_MAX) {
+        drwav_uninit(pWav);
+        return NULL;    /* File's too big. */
+    }
+
+    pSampleData = (double*)drwav__malloc_from_callbacks((size_t)sampleDataSize, &pWav->allocationCallbacks); /* <-- Safe cast due to the check above. */
+    if (pSampleData == NULL) {
+        drwav_uninit(pWav);
+        return NULL;    /* Failed to allocate memory. */
+    }
+
+    framesRead = drwav_read_pcm_frames_f32(pWav, (size_t)pWav->totalPCMFrameCount, pSampleData);
+    if (framesRead != pWav->totalPCMFrameCount) {
+        drwav__free_from_callbacks(pSampleData, &pWav->allocationCallbacks);
+        drwav_uninit(pWav);
+        return NULL;    /* There was an error reading the samples. */
+    }
+
+    drwav_uninit(pWav);
+
+    if (sampleRate) {
+        *sampleRate = pWav->sampleRate;
+    }
+    if (channels) {
+        *channels = pWav->channels;
+    }
+    if (totalFrameCount) {
+        *totalFrameCount = pWav->totalPCMFrameCount;
+    }
+
+    return pSampleData;
+}
+
+static drwav_int32* drwav__read_pcm_frames_and_close_s32(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount)
+{
+    drwav_uint64 sampleDataSize;
+    drwav_int32* pSampleData;
+    drwav_uint64 framesRead;
+
+    DRWAV_ASSERT(pWav != NULL);
+
+    sampleDataSize = pWav->totalPCMFrameCount * pWav->channels * sizeof(drwav_int32);
+    if (sampleDataSize > DRWAV_SIZE_MAX) {
+        drwav_uninit(pWav);
+        return NULL;    /* File's too big. */
+    }
+
+    pSampleData = (drwav_int32*)drwav__malloc_from_callbacks((size_t)sampleDataSize, &pWav->allocationCallbacks); /* <-- Safe cast due to the check above. */
+    if (pSampleData == NULL) {
+        drwav_uninit(pWav);
+        return NULL;    /* Failed to allocate memory. */
+    }
+
+    framesRead = drwav_read_pcm_frames_s32(pWav, (size_t)pWav->totalPCMFrameCount, pSampleData);
+    if (framesRead != pWav->totalPCMFrameCount) {
+        drwav__free_from_callbacks(pSampleData, &pWav->allocationCallbacks);
+        drwav_uninit(pWav);
+        return NULL;    /* There was an error reading the samples. */
+    }
+
+    drwav_uninit(pWav);
+
+    if (sampleRate) {
+        *sampleRate = pWav->sampleRate;
+    }
+    if (channels) {
+        *channels = pWav->channels;
+    }
+    if (totalFrameCount) {
+        *totalFrameCount = pWav->totalPCMFrameCount;
+    }
+
+    return pSampleData;
+}
+
+
+
+DRWAV_API drwav_int16* drwav_open_and_read_pcm_frames_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init(&wav, onRead, onSeek, pUserData, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API double* drwav_open_and_read_pcm_frames_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init(&wav, onRead, onSeek, pUserData, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API drwav_int32* drwav_open_and_read_pcm_frames_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init(&wav, onRead, onSeek, pUserData, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+#ifndef DR_WAV_NO_STDIO
+DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_file(&wav, filename, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API double* drwav_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_file(&wav, filename, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_file(&wav, filename, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+
+DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_file_w(&wav, filename, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API double* drwav_open_file_and_read_pcm_frames_f32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_file_w(&wav, filename, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_file_w(&wav, filename, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+#endif
+
+DRWAV_API drwav_int16* drwav_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_memory(&wav, data, dataSize, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API double* drwav_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_memory(&wav, data, dataSize, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API drwav_int32* drwav_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_memory(&wav, data, dataSize, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+#endif  /* DR_WAV_NO_CONVERSION_API */
+
+
+DRWAV_API void drwav_free(void* p, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks != NULL) {
+        drwav__free_from_callbacks(p, pAllocationCallbacks);
+    } else {
+        drwav__free_default(p, NULL);
+    }
+}
+
+DRWAV_API drwav_uint16 drwav_bytes_to_u16(const drwav_uint8* data)
+{
+    return drwav__bytes_to_u16(data);
+}
+
+DRWAV_API drwav_int16 drwav_bytes_to_s16(const drwav_uint8* data)
+{
+    return drwav__bytes_to_s16(data);
+}
+
+DRWAV_API drwav_uint32 drwav_bytes_to_u32(const drwav_uint8* data)
+{
+    return drwav__bytes_to_u32(data);
+}
+
+DRWAV_API drwav_int32 drwav_bytes_to_s32(const drwav_uint8* data)
+{
+    return drwav__bytes_to_s32(data);
+}
+
+DRWAV_API drwav_uint64 drwav_bytes_to_u64(const drwav_uint8* data)
+{
+    return drwav__bytes_to_u64(data);
+}
+
+DRWAV_API drwav_int64 drwav_bytes_to_s64(const drwav_uint8* data)
+{
+    return drwav__bytes_to_s64(data);
+}
+
+
+DRWAV_API drwav_bool32 drwav_guid_equal(const drwav_uint8 a[16], const drwav_uint8 b[16])
+{
+    return drwav__guid_equal(a, b);
+}
+
+DRWAV_API drwav_bool32 drwav_fourcc_equal(const drwav_uint8* a, const char* b)
+{
+    return drwav__fourcc_equal(a, b);
+}
+
+#endif  /* DR_WAV_IMPLEMENTATION */
+
+/*
+RELEASE NOTES - v0.11.0
+=======================
+Version 0.11.0 has breaking API changes.
+
+Improved Client-Defined Memory Allocation
+-----------------------------------------
+The main change with this release is the addition of a more flexible way of implementing custom memory allocation routines. The
+existing system of DRWAV_MALLOC, DRWAV_REALLOC and DRWAV_FREE are still in place and will be used by default when no custom
+allocation callbacks are specified.
+
+To use the new system, you pass in a pointer to a drwav_allocation_callbacks object to drwav_init() and family, like this:
+
+    void* my_malloc(size_t sz, void* pUserData)
+    {
+        return malloc(sz);
+    }
+    void* my_realloc(void* p, size_t sz, void* pUserData)
+    {
+        return realloc(p, sz);
+    }
+    void my_free(void* p, void* pUserData)
+    {
+        free(p);
+    }
+
+    ...
+
+    drwav_allocation_callbacks allocationCallbacks;
+    allocationCallbacks.pUserData = &myData;
+    allocationCallbacks.onMalloc  = my_malloc;
+    allocationCallbacks.onRealloc = my_realloc;
+    allocationCallbacks.onFree    = my_free;
+    drwav_init_file(&wav, "my_file.wav", &allocationCallbacks);
+
+The advantage of this new system is that it allows you to specify user data which will be passed in to the allocation routines.
+
+Passing in null for the allocation callbacks object will cause dr_wav to use defaults which is the same as DRWAV_MALLOC,
+DRWAV_REALLOC and DRWAV_FREE and the equivalent of how it worked in previous versions.
+
+Every API that opens a drwav object now takes this extra parameter. These include the following:
+
+    drwav_init()
+    drwav_init_ex()
+    drwav_init_file()
+    drwav_init_file_ex()
+    drwav_init_file_w()
+    drwav_init_file_w_ex()
+    drwav_init_memory()
+    drwav_init_memory_ex()
+    drwav_init_write()
+    drwav_init_write_sequential()
+    drwav_init_write_sequential_pcm_frames()
+    drwav_init_file_write()
+    drwav_init_file_write_sequential()
+    drwav_init_file_write_sequential_pcm_frames()
+    drwav_init_file_write_w()
+    drwav_init_file_write_sequential_w()
+    drwav_init_file_write_sequential_pcm_frames_w()
+    drwav_init_memory_write()
+    drwav_init_memory_write_sequential()
+    drwav_init_memory_write_sequential_pcm_frames()
+    drwav_open_and_read_pcm_frames_s16()
+    drwav_open_and_read_pcm_frames_f32()
+    drwav_open_and_read_pcm_frames_s32()
+    drwav_open_file_and_read_pcm_frames_s16()
+    drwav_open_file_and_read_pcm_frames_f32()
+    drwav_open_file_and_read_pcm_frames_s32()
+    drwav_open_file_and_read_pcm_frames_s16_w()
+    drwav_open_file_and_read_pcm_frames_f32_w()
+    drwav_open_file_and_read_pcm_frames_s32_w()
+    drwav_open_memory_and_read_pcm_frames_s16()
+    drwav_open_memory_and_read_pcm_frames_f32()
+    drwav_open_memory_and_read_pcm_frames_s32()
+
+Endian Improvements
+-------------------
+Previously, the following APIs returned little-endian audio data. These now return native-endian data. This improves compatibility
+on big-endian architectures.
+
+    drwav_read_pcm_frames()
+    drwav_read_pcm_frames_s16()
+    drwav_read_pcm_frames_s32()
+    drwav_read_pcm_frames_f32()
+    drwav_open_and_read_pcm_frames_s16()
+    drwav_open_and_read_pcm_frames_s32()
+    drwav_open_and_read_pcm_frames_f32()
+    drwav_open_file_and_read_pcm_frames_s16()
+    drwav_open_file_and_read_pcm_frames_s32()
+    drwav_open_file_and_read_pcm_frames_f32()
+    drwav_open_file_and_read_pcm_frames_s16_w()
+    drwav_open_file_and_read_pcm_frames_s32_w()
+    drwav_open_file_and_read_pcm_frames_f32_w()
+    drwav_open_memory_and_read_pcm_frames_s16()
+    drwav_open_memory_and_read_pcm_frames_s32()
+    drwav_open_memory_and_read_pcm_frames_f32()
+
+APIs have been added to give you explicit control over whether or not audio data is read or written in big- or little-endian byte
+order:
+
+    drwav_read_pcm_frames_le()
+    drwav_read_pcm_frames_be()
+    drwav_read_pcm_frames_s16le()
+    drwav_read_pcm_frames_s16be()
+    drwav_read_pcm_frames_f32le()
+    drwav_read_pcm_frames_f32be()
+    drwav_read_pcm_frames_s32le()
+    drwav_read_pcm_frames_s32be()
+    drwav_write_pcm_frames_le()
+    drwav_write_pcm_frames_be()
+
+Removed APIs
+------------
+The following APIs were deprecated in version 0.10.0 and have now been removed:
+
+    drwav_open()
+    drwav_open_ex()
+    drwav_open_write()
+    drwav_open_write_sequential()
+    drwav_open_file()
+    drwav_open_file_ex()
+    drwav_open_file_write()
+    drwav_open_file_write_sequential()
+    drwav_open_memory()
+    drwav_open_memory_ex()
+    drwav_open_memory_write()
+    drwav_open_memory_write_sequential()
+    drwav_close()
+
+
+
+RELEASE NOTES - v0.10.0
+=======================
+Version 0.10.0 has breaking API changes. There are no significant bug fixes in this release, so if you are affected you do
+not need to upgrade.
+
+Removed APIs
+------------
+The following APIs were deprecated in version 0.9.0 and have been completely removed in version 0.10.0:
+
+    drwav_read()
+    drwav_read_s16()
+    drwav_read_f32()
+    drwav_read_s32()
+    drwav_seek_to_sample()
+    drwav_write()
+    drwav_open_and_read_s16()
+    drwav_open_and_read_f32()
+    drwav_open_and_read_s32()
+    drwav_open_file_and_read_s16()
+    drwav_open_file_and_read_f32()
+    drwav_open_file_and_read_s32()
+    drwav_open_memory_and_read_s16()
+    drwav_open_memory_and_read_f32()
+    drwav_open_memory_and_read_s32()
+    drwav::totalSampleCount
+
+See release notes for version 0.9.0 at the bottom of this file for replacement APIs.
+
+Deprecated APIs
+---------------
+The following APIs have been deprecated. There is a confusing and completely arbitrary difference between drwav_init*() and
+drwav_open*(), where drwav_init*() initializes a pre-allocated drwav object, whereas drwav_open*() will first allocated a
+drwav object on the heap and then initialize it. drwav_open*() has been deprecated which means you must now use a pre-
+allocated drwav object with drwav_init*(). If you need the previous functionality, you can just do a malloc() followed by
+a called to one of the drwav_init*() APIs.
+
+    drwav_open()
+    drwav_open_ex()
+    drwav_open_write()
+    drwav_open_write_sequential()
+    drwav_open_file()
+    drwav_open_file_ex()
+    drwav_open_file_write()
+    drwav_open_file_write_sequential()
+    drwav_open_memory()
+    drwav_open_memory_ex()
+    drwav_open_memory_write()
+    drwav_open_memory_write_sequential()
+    drwav_close()
+
+These APIs will be removed completely in a future version. The rationale for this change is to remove confusion between the
+two different ways to initialize a drwav object.
+*/
+
+/*
+REVISION HISTORY
+================
+v0.12.5 - 2020-05-27
+  - Minor documentation fix.
+
+v0.12.4 - 2020-05-16
+  - Replace assert() with DRWAV_ASSERT().
+  - Add compile-time and run-time version querying.
+    - DRWAV_VERSION_MINOR
+    - DRWAV_VERSION_MAJOR
+    - DRWAV_VERSION_REVISION
+    - DRWAV_VERSION_STRING
+    - drwav_version()
+    - drwav_version_string()
+
+v0.12.3 - 2020-04-30
+  - Fix compilation errors with VC6.
+
+v0.12.2 - 2020-04-21
+  - Fix a bug where drwav_init_file() does not close the file handle after attempting to load an erroneous file.
+
+v0.12.1 - 2020-04-13
+  - Fix some pedantic warnings.
+
+v0.12.0 - 2020-04-04
+  - API CHANGE: Add container and format parameters to the chunk callback.
+  - Minor documentation updates.
+
+v0.11.5 - 2020-03-07
+  - Fix compilation error with Visual Studio .NET 2003.
+
+v0.11.4 - 2020-01-29
+  - Fix some static analysis warnings.
+  - Fix a bug when reading f32 samples from an A-law encoded stream.
+
+v0.11.3 - 2020-01-12
+  - Minor changes to some f32 format conversion routines.
+  - Minor bug fix for ADPCM conversion when end of file is reached.
+
+v0.11.2 - 2019-12-02
+  - Fix a possible crash when using custom memory allocators without a custom realloc() implementation.
+  - Fix an integer overflow bug.
+  - Fix a null pointer dereference bug.
+  - Add limits to sample rate, channels and bits per sample to tighten up some validation.
+
+v0.11.1 - 2019-10-07
+  - Internal code clean up.
+
+v0.11.0 - 2019-10-06
+  - API CHANGE: Add support for user defined memory allocation routines. This system allows the program to specify their own memory allocation
+    routines with a user data pointer for client-specific contextual data. This adds an extra parameter to the end of the following APIs:
+    - drwav_init()
+    - drwav_init_ex()
+    - drwav_init_file()
+    - drwav_init_file_ex()
+    - drwav_init_file_w()
+    - drwav_init_file_w_ex()
+    - drwav_init_memory()
+    - drwav_init_memory_ex()
+    - drwav_init_write()
+    - drwav_init_write_sequential()
+    - drwav_init_write_sequential_pcm_frames()
+    - drwav_init_file_write()
+    - drwav_init_file_write_sequential()
+    - drwav_init_file_write_sequential_pcm_frames()
+    - drwav_init_file_write_w()
+    - drwav_init_file_write_sequential_w()
+    - drwav_init_file_write_sequential_pcm_frames_w()
+    - drwav_init_memory_write()
+    - drwav_init_memory_write_sequential()
+    - drwav_init_memory_write_sequential_pcm_frames()
+    - drwav_open_and_read_pcm_frames_s16()
+    - drwav_open_and_read_pcm_frames_f32()
+    - drwav_open_and_read_pcm_frames_s32()
+    - drwav_open_file_and_read_pcm_frames_s16()
+    - drwav_open_file_and_read_pcm_frames_f32()
+    - drwav_open_file_and_read_pcm_frames_s32()
+    - drwav_open_file_and_read_pcm_frames_s16_w()
+    - drwav_open_file_and_read_pcm_frames_f32_w()
+    - drwav_open_file_and_read_pcm_frames_s32_w()
+    - drwav_open_memory_and_read_pcm_frames_s16()
+    - drwav_open_memory_and_read_pcm_frames_f32()
+    - drwav_open_memory_and_read_pcm_frames_s32()
+    Set this extra parameter to NULL to use defaults which is the same as the previous behaviour. Setting this NULL will use
+    DRWAV_MALLOC, DRWAV_REALLOC and DRWAV_FREE.
+  - Add support for reading and writing PCM frames in an explicit endianness. New APIs:
+    - drwav_read_pcm_frames_le()
+    - drwav_read_pcm_frames_be()
+    - drwav_read_pcm_frames_s16le()
+    - drwav_read_pcm_frames_s16be()
+    - drwav_read_pcm_frames_f32le()
+    - drwav_read_pcm_frames_f32be()
+    - drwav_read_pcm_frames_s32le()
+    - drwav_read_pcm_frames_s32be()
+    - drwav_write_pcm_frames_le()
+    - drwav_write_pcm_frames_be()
+  - Remove deprecated APIs.
+  - API CHANGE: The following APIs now return native-endian data. Previously they returned little-endian data.
+    - drwav_read_pcm_frames()
+    - drwav_read_pcm_frames_s16()
+    - drwav_read_pcm_frames_s32()
+    - drwav_read_pcm_frames_f32()
+    - drwav_open_and_read_pcm_frames_s16()
+    - drwav_open_and_read_pcm_frames_s32()
+    - drwav_open_and_read_pcm_frames_f32()
+    - drwav_open_file_and_read_pcm_frames_s16()
+    - drwav_open_file_and_read_pcm_frames_s32()
+    - drwav_open_file_and_read_pcm_frames_f32()
+    - drwav_open_file_and_read_pcm_frames_s16_w()
+    - drwav_open_file_and_read_pcm_frames_s32_w()
+    - drwav_open_file_and_read_pcm_frames_f32_w()
+    - drwav_open_memory_and_read_pcm_frames_s16()
+    - drwav_open_memory_and_read_pcm_frames_s32()
+    - drwav_open_memory_and_read_pcm_frames_f32()
+
+v0.10.1 - 2019-08-31
+  - Correctly handle partial trailing ADPCM blocks.
+
+v0.10.0 - 2019-08-04
+  - Remove deprecated APIs.
+  - Add wchar_t variants for file loading APIs:
+      drwav_init_file_w()
+      drwav_init_file_ex_w()
+      drwav_init_file_write_w()
+      drwav_init_file_write_sequential_w()
+  - Add drwav_target_write_size_bytes() which calculates the total size in bytes of a WAV file given a format and sample count.
+  - Add APIs for specifying the PCM frame count instead of the sample count when opening in sequential write mode:
+      drwav_init_write_sequential_pcm_frames()
+      drwav_init_file_write_sequential_pcm_frames()
+      drwav_init_file_write_sequential_pcm_frames_w()
+      drwav_init_memory_write_sequential_pcm_frames()
+  - Deprecate drwav_open*() and drwav_close():
+      drwav_open()
+      drwav_open_ex()
+      drwav_open_write()
+      drwav_open_write_sequential()
+      drwav_open_file()
+      drwav_open_file_ex()
+      drwav_open_file_write()
+      drwav_open_file_write_sequential()
+      drwav_open_memory()
+      drwav_open_memory_ex()
+      drwav_open_memory_write()
+      drwav_open_memory_write_sequential()
+      drwav_close()
+  - Minor documentation updates.
+
+v0.9.2 - 2019-05-21
+  - Fix warnings.
+
+v0.9.1 - 2019-05-05
+  - Add support for C89.
+  - Change license to choice of public domain or MIT-0.
+
+v0.9.0 - 2018-12-16
+  - API CHANGE: Add new reading APIs for reading by PCM frames instead of samples. Old APIs have been deprecated and
+    will be removed in v0.10.0. Deprecated APIs and their replacements:
+      drwav_read()                     -> drwav_read_pcm_frames()
+      drwav_read_s16()                 -> drwav_read_pcm_frames_s16()
+      drwav_read_f32()                 -> drwav_read_pcm_frames_f32()
+      drwav_read_s32()                 -> drwav_read_pcm_frames_s32()
+      drwav_seek_to_sample()           -> drwav_seek_to_pcm_frame()
+      drwav_write()                    -> drwav_write_pcm_frames()
+      drwav_open_and_read_s16()        -> drwav_open_and_read_pcm_frames_s16()
+      drwav_open_and_read_f32()        -> drwav_open_and_read_pcm_frames_f32()
+      drwav_open_and_read_s32()        -> drwav_open_and_read_pcm_frames_s32()
+      drwav_open_file_and_read_s16()   -> drwav_open_file_and_read_pcm_frames_s16()
+      drwav_open_file_and_read_f32()   -> drwav_open_file_and_read_pcm_frames_f32()
+      drwav_open_file_and_read_s32()   -> drwav_open_file_and_read_pcm_frames_s32()
+      drwav_open_memory_and_read_s16() -> drwav_open_memory_and_read_pcm_frames_s16()
+      drwav_open_memory_and_read_f32() -> drwav_open_memory_and_read_pcm_frames_f32()
+      drwav_open_memory_and_read_s32() -> drwav_open_memory_and_read_pcm_frames_s32()
+      drwav::totalSampleCount          -> drwav::totalPCMFrameCount
+  - API CHANGE: Rename drwav_open_and_read_file_*() to drwav_open_file_and_read_*().
+  - API CHANGE: Rename drwav_open_and_read_memory_*() to drwav_open_memory_and_read_*().
+  - Add built-in support for smpl chunks.
+  - Add support for firing a callback for each chunk in the file at initialization time.
+    - This is enabled through the drwav_init_ex(), etc. family of APIs.
+  - Handle invalid FMT chunks more robustly.
+
+v0.8.5 - 2018-09-11
+  - Const correctness.
+  - Fix a potential stack overflow.
+
+v0.8.4 - 2018-08-07
+  - Improve 64-bit detection.
+
+v0.8.3 - 2018-08-05
+  - Fix C++ build on older versions of GCC.
+
+v0.8.2 - 2018-08-02
+  - Fix some big-endian bugs.
+
+v0.8.1 - 2018-06-29
+  - Add support for sequential writing APIs.
+  - Disable seeking in write mode.
+  - Fix bugs with Wave64.
+  - Fix typos.
+
+v0.8 - 2018-04-27
+  - Bug fix.
+  - Start using major.minor.revision versioning.
+
+v0.7f - 2018-02-05
+  - Restrict ADPCM formats to a maximum of 2 channels.
+
+v0.7e - 2018-02-02
+  - Fix a crash.
+
+v0.7d - 2018-02-01
+  - Fix a crash.
+
+v0.7c - 2018-02-01
+  - Set drwav.bytesPerSample to 0 for all compressed formats.
+  - Fix a crash when reading 16-bit floating point WAV files. In this case dr_wav will output silence for
+    all format conversion reading APIs (*_s16, *_s32, *_f32 APIs).
+  - Fix some divide-by-zero errors.
+
+v0.7b - 2018-01-22
+  - Fix errors with seeking of compressed formats.
+  - Fix compilation error when DR_WAV_NO_CONVERSION_API
+
+v0.7a - 2017-11-17
+  - Fix some GCC warnings.
+
+v0.7 - 2017-11-04
+  - Add writing APIs.
+
+v0.6 - 2017-08-16
+  - API CHANGE: Rename dr_* types to drwav_*.
+  - Add support for custom implementations of malloc(), realloc(), etc.
+  - Add support for Microsoft ADPCM.
+  - Add support for IMA ADPCM (DVI, format code 0x11).
+  - Optimizations to drwav_read_s16().
+  - Bug fixes.
+
+v0.5g - 2017-07-16
+  - Change underlying type for booleans to unsigned.
+
+v0.5f - 2017-04-04
+  - Fix a minor bug with drwav_open_and_read_s16() and family.
+
+v0.5e - 2016-12-29
+  - Added support for reading samples as signed 16-bit integers. Use the _s16() family of APIs for this.
+  - Minor fixes to documentation.
+
+v0.5d - 2016-12-28
+  - Use drwav_int* and drwav_uint* sized types to improve compiler support.
+
+v0.5c - 2016-11-11
+  - Properly handle JUNK chunks that come before the FMT chunk.
+
+v0.5b - 2016-10-23
+  - A minor change to drwav_bool8 and drwav_bool32 types.
+
+v0.5a - 2016-10-11
+  - Fixed a bug with drwav_open_and_read() and family due to incorrect argument ordering.
+  - Improve A-law and mu-law efficiency.
+
+v0.5 - 2016-09-29
+  - API CHANGE. Swap the order of "channels" and "sampleRate" parameters in drwav_open_and_read*(). Rationale for this is to
+    keep it consistent with dr_audio and dr_flac.
+
+v0.4b - 2016-09-18
+  - Fixed a typo in documentation.
+
+v0.4a - 2016-09-18
+  - Fixed a typo.
+  - Change date format to ISO 8601 (YYYY-MM-DD)
+
+v0.4 - 2016-07-13
+  - API CHANGE. Make onSeek consistent with dr_flac.
+  - API CHANGE. Rename drwav_seek() to drwav_seek_to_sample() for clarity and consistency with dr_flac.
+  - Added support for Sony Wave64.
+
+v0.3a - 2016-05-28
+  - API CHANGE. Return drwav_bool32 instead of int in onSeek callback.
+  - Fixed a memory leak.
+
+v0.3 - 2016-05-22
+  - Lots of API changes for consistency.
+
+v0.2a - 2016-05-16
+  - Fixed Linux/GCC build.
+
+v0.2 - 2016-05-11
+  - Added support for reading data as signed 32-bit PCM for consistency with dr_flac.
+
+v0.1a - 2016-05-07
+  - Fixed a bug in drwav_open_file() where the file handle would not be closed if the loader failed to initialize.
+
+v0.1 - 2016-05-04
+  - Initial versioned release.
+*/
+
+/*
+This software is available as a choice of the following licenses. Choose
+whichever you prefer.
+
+===============================================================================
+ALTERNATIVE 1 - Public Domain (www.unlicense.org)
+===============================================================================
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
+
+===============================================================================
+ALTERNATIVE 2 - MIT No Attribution
+===============================================================================
+Copyright 2020 David Reid
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
diff --git a/sys-tune/source/impl/dr_wav.h.bak b/sys-tune/source/impl/dr_wav.h.bak
index a1e5f58..5df14e2 100644
--- a/sys-tune/source/impl/dr_wav.h.bak
+++ b/sys-tune/source/impl/dr_wav.h.bak
@@ -1,6128 +1,6128 @@
-/*
-WAV audio loader and writer. Choice of public domain or MIT-0. See license statements at the end of this file.
-dr_wav - v0.12.5 - 2020-05-27
-
-David Reid - mackron@gmail.com
-
-GitHub: https://github.com/mackron/dr_libs
-*/
-
-/*
-RELEASE NOTES - VERSION 0.12
-============================
-Version 0.12 includes breaking changes to custom chunk handling.
-
-
-Changes to Chunk Callback
--------------------------
-dr_wav supports the ability to fire a callback when a chunk is encounted (except for WAVE and FMT chunks). The callback has been updated to include both the
-container (RIFF or Wave64) and the FMT chunk which contains information about the format of the data in the wave file.
-
-Previously, there was no direct way to determine the container, and therefore no way discriminate against the different IDs in the chunk header (RIFF and
-Wave64 containers encode chunk ID's differently). The `container` parameter can be used to know which ID to use.
-
-Sometimes it can be useful to know the data format at the time the chunk callback is fired. A pointer to a `drwav_fmt` object is now passed into the chunk
-callback which will give you information about the data format. To determine the sample format, use `drwav_fmt_get_format()`. This will return one of the
-`DR_WAVE_FORMAT_*` tokens.
-*/
-
-/*
-Introduction
-============
-This is a single file library. To use it, do something like the following in one .c file.
-    
-    ```c
-    #define DR_WAV_IMPLEMENTATION
-    #include "dr_wav.h"
-    ```
-
-You can then #include this file in other parts of the program as you would with any other header file. Do something like the following to read audio data:
-
-    ```c
-    drwav wav;
-    if (!drwav_init_file(&wav, "my_song.wav", NULL)) {
-        // Error opening WAV file.
-    }
-
-    drwav_int32* pDecodedInterleavedPCMFrames = malloc(wav.totalPCMFrameCount * wav.channels * sizeof(drwav_int32));
-    size_t numberOfSamplesActuallyDecoded = drwav_read_pcm_frames_s32(&wav, wav.totalPCMFrameCount, pDecodedInterleavedPCMFrames);
-
-    ...
-
-    drwav_uninit(&wav);
-    ```
-
-If you just want to quickly open and read the audio data in a single operation you can do something like this:
-
-    ```c
-    unsigned int channels;
-    unsigned int sampleRate;
-    drwav_uint64 totalPCMFrameCount;
-    float* pSampleData = drwav_open_file_and_read_pcm_frames_f32("my_song.wav", &channels, &sampleRate, &totalPCMFrameCount, NULL);
-    if (pSampleData == NULL) {
-        // Error opening and reading WAV file.
-    }
-
-    ...
-
-    drwav_free(pSampleData);
-    ```
-
-The examples above use versions of the API that convert the audio data to a consistent format (32-bit signed PCM, in this case), but you can still output the
-audio data in its internal format (see notes below for supported formats):
-
-    ```c
-    size_t framesRead = drwav_read_pcm_frames(&wav, wav.totalPCMFrameCount, pDecodedInterleavedPCMFrames);
-    ```
-
-You can also read the raw bytes of audio data, which could be useful if dr_wav does not have native support for a particular data format:
-
-    ```c
-    size_t bytesRead = drwav_read_raw(&wav, bytesToRead, pRawDataBuffer);
-    ```
-
-dr_wav can also be used to output WAV files. This does not currently support compressed formats. To use this, look at `drwav_init_write()`,
-`drwav_init_file_write()`, etc. Use `drwav_write_pcm_frames()` to write samples, or `drwav_write_raw()` to write raw data in the "data" chunk.
-
-    ```c
-    drwav_data_format format;
-    format.container = drwav_container_riff;     // <-- drwav_container_riff = normal WAV files, drwav_container_w64 = Sony Wave64.
-    format.format = DR_WAVE_FORMAT_PCM;          // <-- Any of the DR_WAVE_FORMAT_* codes.
-    format.channels = 2;
-    format.sampleRate = 44100;
-    format.bitsPerSample = 16;
-    drwav_init_file_write(&wav, "data/recording.wav", &format, NULL);
-
-    ...
-
-    drwav_uint64 framesWritten = drwav_write_pcm_frames(pWav, frameCount, pSamples);
-    ```
-
-dr_wav has seamless support the Sony Wave64 format. The decoder will automatically detect it and it should Just Work without any manual intervention.
-
-
-Build Options
-=============
-#define these options before including this file.
-
-#define DR_WAV_NO_CONVERSION_API
-  Disables conversion APIs such as `drwav_read_pcm_frames_f32()` and `drwav_s16_to_f32()`.
-
-#define DR_WAV_NO_STDIO
-  Disables APIs that initialize a decoder from a file such as `drwav_init_file()`, `drwav_init_file_write()`, etc.
-
-
-
-Notes
-=====
-- Samples are always interleaved.
-- The default read function does not do any data conversion. Use `drwav_read_pcm_frames_f32()`, `drwav_read_pcm_frames_s32()` and `drwav_read_pcm_frames_s16()`
-  to read and convert audio data to 32-bit floating point, signed 32-bit integer and signed 16-bit integer samples respectively. Tested and supported internal
-  formats include the following:
-  - Unsigned 8-bit PCM
-  - Signed 12-bit PCM
-  - Signed 16-bit PCM
-  - Signed 24-bit PCM
-  - Signed 32-bit PCM
-  - IEEE 32-bit floating point
-  - IEEE 64-bit floating point
-  - A-law and u-law
-  - Microsoft ADPCM
-  - IMA ADPCM (DVI, format code 0x11)
-- dr_wav will try to read the WAV file as best it can, even if it's not strictly conformant to the WAV format.
-*/
-
-#ifndef dr_wav_h
-#define dr_wav_h
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define DRWAV_STRINGIFY(x)      #x
-#define DRWAV_XSTRINGIFY(x)     DRWAV_STRINGIFY(x)
-
-#define DRWAV_VERSION_MAJOR     0
-#define DRWAV_VERSION_MINOR     12
-#define DRWAV_VERSION_REVISION  5
-#define DRWAV_VERSION_STRING    DRWAV_XSTRINGIFY(DRWAV_VERSION_MAJOR) "." DRWAV_XSTRINGIFY(DRWAV_VERSION_MINOR) "." DRWAV_XSTRINGIFY(DRWAV_VERSION_REVISION)
-
-#include <stddef.h> /* For size_t. */
-
-/* Sized types. Prefer built-in types. Fall back to stdint. */
-#ifdef _MSC_VER
-    #if defined(__clang__)
-        #pragma GCC diagnostic push
-        #pragma GCC diagnostic ignored "-Wlanguage-extension-token"
-        #pragma GCC diagnostic ignored "-Wlong-long"        
-        #pragma GCC diagnostic ignored "-Wc++11-long-long"
-    #endif
-    typedef   signed __int8  drwav_int8;
-    typedef unsigned __int8  drwav_uint8;
-    typedef   signed __int16 drwav_int16;
-    typedef unsigned __int16 drwav_uint16;
-    typedef   signed __int32 drwav_int32;
-    typedef unsigned __int32 drwav_uint32;
-    typedef   signed __int64 drwav_int64;
-    typedef unsigned __int64 drwav_uint64;
-    #if defined(__clang__)
-        #pragma GCC diagnostic pop
-    #endif
-#else
-    #include <stdint.h>
-    typedef int8_t           drwav_int8;
-    typedef uint8_t          drwav_uint8;
-    typedef int16_t          drwav_int16;
-    typedef uint16_t         drwav_uint16;
-    typedef int32_t          drwav_int32;
-    typedef uint32_t         drwav_uint32;
-    typedef int64_t          drwav_int64;
-    typedef uint64_t         drwav_uint64;
-#endif
-typedef drwav_uint8          drwav_bool8;
-typedef drwav_uint32         drwav_bool32;
-#define DRWAV_TRUE           1
-#define DRWAV_FALSE          0
-
-#if !defined(DRWAV_API)
-    #if defined(DRWAV_DLL)
-        #if defined(_WIN32)
-            #define DRWAV_DLL_IMPORT  __declspec(dllimport)
-            #define DRWAV_DLL_EXPORT  __declspec(dllexport)
-            #define DRWAV_DLL_PRIVATE static
-        #else
-            #if defined(__GNUC__) && __GNUC__ >= 4
-                #define DRWAV_DLL_IMPORT  __attribute__((visibility("default")))
-                #define DRWAV_DLL_EXPORT  __attribute__((visibility("default")))
-                #define DRWAV_DLL_PRIVATE __attribute__((visibility("hidden")))
-            #else
-                #define DRWAV_DLL_IMPORT
-                #define DRWAV_DLL_EXPORT
-                #define DRWAV_DLL_PRIVATE static
-            #endif
-        #endif
-
-        #if defined(DR_WAV_IMPLEMENTATION) || defined(DRWAV_IMPLEMENTATION)
-            #define DRWAV_API  DRWAV_DLL_EXPORT
-        #else
-            #define DRWAV_API  DRWAV_DLL_IMPORT
-        #endif
-        #define DRWAV_PRIVATE DRWAV_DLL_PRIVATE
-    #else
-        #define DRWAV_API extern
-        #define DRWAV_PRIVATE static
-    #endif
-#endif
-
-typedef drwav_int32 drwav_result;
-#define DRWAV_SUCCESS                        0
-#define DRWAV_ERROR                         -1   /* A generic error. */
-#define DRWAV_INVALID_ARGS                  -2
-#define DRWAV_INVALID_OPERATION             -3
-#define DRWAV_OUT_OF_MEMORY                 -4
-#define DRWAV_OUT_OF_RANGE                  -5
-#define DRWAV_ACCESS_DENIED                 -6
-#define DRWAV_DOES_NOT_EXIST                -7
-#define DRWAV_ALREADY_EXISTS                -8
-#define DRWAV_TOO_MANY_OPEN_FILES           -9
-#define DRWAV_INVALID_FILE                  -10
-#define DRWAV_TOO_BIG                       -11
-#define DRWAV_PATH_TOO_LONG                 -12
-#define DRWAV_NAME_TOO_LONG                 -13
-#define DRWAV_NOT_DIRECTORY                 -14
-#define DRWAV_IS_DIRECTORY                  -15
-#define DRWAV_DIRECTORY_NOT_EMPTY           -16
-#define DRWAV_END_OF_FILE                   -17
-#define DRWAV_NO_SPACE                      -18
-#define DRWAV_BUSY                          -19
-#define DRWAV_IO_ERROR                      -20
-#define DRWAV_INTERRUPT                     -21
-#define DRWAV_UNAVAILABLE                   -22
-#define DRWAV_ALREADY_IN_USE                -23
-#define DRWAV_BAD_ADDRESS                   -24
-#define DRWAV_BAD_SEEK                      -25
-#define DRWAV_BAD_PIPE                      -26
-#define DRWAV_DEADLOCK                      -27
-#define DRWAV_TOO_MANY_LINKS                -28
-#define DRWAV_NOT_IMPLEMENTED               -29
-#define DRWAV_NO_MESSAGE                    -30
-#define DRWAV_BAD_MESSAGE                   -31
-#define DRWAV_NO_DATA_AVAILABLE             -32
-#define DRWAV_INVALID_DATA                  -33
-#define DRWAV_TIMEOUT                       -34
-#define DRWAV_NO_NETWORK                    -35
-#define DRWAV_NOT_UNIQUE                    -36
-#define DRWAV_NOT_SOCKET                    -37
-#define DRWAV_NO_ADDRESS                    -38
-#define DRWAV_BAD_PROTOCOL                  -39
-#define DRWAV_PROTOCOL_UNAVAILABLE          -40
-#define DRWAV_PROTOCOL_NOT_SUPPORTED        -41
-#define DRWAV_PROTOCOL_FAMILY_NOT_SUPPORTED -42
-#define DRWAV_ADDRESS_FAMILY_NOT_SUPPORTED  -43
-#define DRWAV_SOCKET_NOT_SUPPORTED          -44
-#define DRWAV_CONNECTION_RESET              -45
-#define DRWAV_ALREADY_CONNECTED             -46
-#define DRWAV_NOT_CONNECTED                 -47
-#define DRWAV_CONNECTION_REFUSED            -48
-#define DRWAV_NO_HOST                       -49
-#define DRWAV_IN_PROGRESS                   -50
-#define DRWAV_CANCELLED                     -51
-#define DRWAV_MEMORY_ALREADY_MAPPED         -52
-#define DRWAV_AT_END                        -53
-
-/* Common data formats. */
-#define DR_WAVE_FORMAT_PCM          0x1
-#define DR_WAVE_FORMAT_ADPCM        0x2
-#define DR_WAVE_FORMAT_IEEE_FLOAT   0x3
-#define DR_WAVE_FORMAT_ALAW         0x6
-#define DR_WAVE_FORMAT_MULAW        0x7
-#define DR_WAVE_FORMAT_DVI_ADPCM    0x11
-#define DR_WAVE_FORMAT_EXTENSIBLE   0xFFFE
-
-/* Constants. */
-#ifndef DRWAV_MAX_SMPL_LOOPS
-#define DRWAV_MAX_SMPL_LOOPS        1
-#endif
-
-/* Flags to pass into drwav_init_ex(), etc. */
-#define DRWAV_SEQUENTIAL            0x00000001
-
-DRWAV_API void drwav_version(drwav_uint32* pMajor, drwav_uint32* pMinor, drwav_uint32* pRevision);
-DRWAV_API const char* drwav_version_string();
-
-typedef enum
-{
-    drwav_seek_origin_start,
-    drwav_seek_origin_current
-} drwav_seek_origin;
-
-typedef enum
-{
-    drwav_container_riff,
-    drwav_container_w64
-} drwav_container;
-
-typedef struct
-{
-    union
-    {
-        drwav_uint8 fourcc[4];
-        drwav_uint8 guid[16];
-    } id;
-
-    /* The size in bytes of the chunk. */
-    drwav_uint64 sizeInBytes;
-
-    /*
-    RIFF = 2 byte alignment.
-    W64  = 8 byte alignment.
-    */
-    unsigned int paddingSize;
-} drwav_chunk_header;
-
-typedef struct
-{
-    /*
-    The format tag exactly as specified in the wave file's "fmt" chunk. This can be used by applications
-    that require support for data formats not natively supported by dr_wav.
-    */
-    drwav_uint16 formatTag;
-
-    /* The number of channels making up the audio data. When this is set to 1 it is mono, 2 is stereo, etc. */
-    drwav_uint16 channels;
-
-    /* The sample rate. Usually set to something like 44100. */
-    drwav_uint32 sampleRate;
-
-    /* Average bytes per second. You probably don't need this, but it's left here for informational purposes. */
-    drwav_uint32 avgBytesPerSec;
-
-    /* Block align. This is equal to the number of channels * bytes per sample. */
-    drwav_uint16 blockAlign;
-
-    /* Bits per sample. */
-    drwav_uint16 bitsPerSample;
-
-    /* The size of the extended data. Only used internally for validation, but left here for informational purposes. */
-    drwav_uint16 extendedSize;
-
-    /*
-    The number of valid bits per sample. When <formatTag> is equal to WAVE_FORMAT_EXTENSIBLE, <bitsPerSample>
-    is always rounded up to the nearest multiple of 8. This variable contains information about exactly how
-    many bits are valid per sample. Mainly used for informational purposes.
-    */
-    drwav_uint16 validBitsPerSample;
-
-    /* The channel mask. Not used at the moment. */
-    drwav_uint32 channelMask;
-
-    /* The sub-format, exactly as specified by the wave file. */
-    drwav_uint8 subFormat[16];
-} drwav_fmt;
-
-DRWAV_API drwav_uint16 drwav_fmt_get_format(const drwav_fmt* pFMT);
-
-
-/*
-Callback for when data is read. Return value is the number of bytes actually read.
-
-pUserData   [in]  The user data that was passed to drwav_init() and family.
-pBufferOut  [out] The output buffer.
-bytesToRead [in]  The number of bytes to read.
-
-Returns the number of bytes actually read.
-
-A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until
-either the entire bytesToRead is filled or you have reached the end of the stream.
-*/
-typedef size_t (* drwav_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead);
-
-/*
-Callback for when data is written. Returns value is the number of bytes actually written.
-
-pUserData    [in]  The user data that was passed to drwav_init_write() and family.
-pData        [out] A pointer to the data to write.
-bytesToWrite [in]  The number of bytes to write.
-
-Returns the number of bytes actually written.
-
-If the return value differs from bytesToWrite, it indicates an error.
-*/
-typedef size_t (* drwav_write_proc)(void* pUserData, const void* pData, size_t bytesToWrite);
-
-/*
-Callback for when data needs to be seeked.
-
-pUserData [in] The user data that was passed to drwav_init() and family.
-offset    [in] The number of bytes to move, relative to the origin. Will never be negative.
-origin    [in] The origin of the seek - the current position or the start of the stream.
-
-Returns whether or not the seek was successful.
-
-Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which will be either drwav_seek_origin_start or
-drwav_seek_origin_current.
-*/
-typedef drwav_bool32 (* drwav_seek_proc)(void* pUserData, int offset, drwav_seek_origin origin);
-
-/*
-Callback for when drwav_init_ex() finds a chunk.
-
-pChunkUserData    [in] The user data that was passed to the pChunkUserData parameter of drwav_init_ex() and family.
-onRead            [in] A pointer to the function to call when reading.
-onSeek            [in] A pointer to the function to call when seeking.
-pReadSeekUserData [in] The user data that was passed to the pReadSeekUserData parameter of drwav_init_ex() and family.
-pChunkHeader      [in] A pointer to an object containing basic header information about the chunk. Use this to identify the chunk.
-container         [in] Whether or not the WAV file is a RIFF or Wave64 container. If you're unsure of the difference, assume RIFF.
-pFMT              [in] A pointer to the object containing the contents of the "fmt" chunk.
-
-Returns the number of bytes read + seeked.
-
-To read data from the chunk, call onRead(), passing in pReadSeekUserData as the first parameter. Do the same for seeking with onSeek(). The return value must
-be the total number of bytes you have read _plus_ seeked.
-
-Use the `container` argument to discriminate the fields in `pChunkHeader->id`. If the container is `drwav_container_riff` you should use `id.fourcc`,
-otherwise you should use `id.guid`.
-
-The `pFMT` parameter can be used to determine the data format of the wave file. Use `drwav_fmt_get_format()` to get the sample format, which will be one of the
-`DR_WAVE_FORMAT_*` identifiers. 
-
-The read pointer will be sitting on the first byte after the chunk's header. You must not attempt to read beyond the boundary of the chunk.
-*/
-typedef drwav_uint64 (* drwav_chunk_proc)(void* pChunkUserData, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pReadSeekUserData, const drwav_chunk_header* pChunkHeader, drwav_container container, const drwav_fmt* pFMT);
-
-typedef struct
-{
-    void* pUserData;
-    void* (* onMalloc)(size_t sz, void* pUserData);
-    void* (* onRealloc)(void* p, size_t sz, void* pUserData);
-    void  (* onFree)(void* p, void* pUserData);
-} drwav_allocation_callbacks;
-
-/* Structure for internal use. Only used for loaders opened with drwav_init_memory(). */
-typedef struct
-{
-    const drwav_uint8* data;
-    size_t dataSize;
-    size_t currentReadPos;
-} drwav__memory_stream;
-
-/* Structure for internal use. Only used for writers opened with drwav_init_memory_write(). */
-typedef struct
-{
-    void** ppData;
-    size_t* pDataSize;
-    size_t dataSize;
-    size_t dataCapacity;
-    size_t currentWritePos;
-} drwav__memory_stream_write;
-
-typedef struct
-{
-    drwav_container container;  /* RIFF, W64. */
-    drwav_uint32 format;        /* DR_WAVE_FORMAT_* */
-    drwav_uint32 channels;
-    drwav_uint32 sampleRate;
-    drwav_uint32 bitsPerSample;
-} drwav_data_format;
-
-
-/* See the following for details on the 'smpl' chunk: https://sites.google.com/site/musicgapi/technical-documents/wav-file-format#smpl */
-typedef struct
-{
-    drwav_uint32 cuePointId;
-    drwav_uint32 type;
-    drwav_uint32 start;
-    drwav_uint32 end;
-    drwav_uint32 fraction;
-    drwav_uint32 playCount;
-} drwav_smpl_loop;
-
- typedef struct
-{
-    drwav_uint32 manufacturer;
-    drwav_uint32 product;
-    drwav_uint32 samplePeriod;
-    drwav_uint32 midiUnityNotes;
-    drwav_uint32 midiPitchFraction;
-    drwav_uint32 smpteFormat;
-    drwav_uint32 smpteOffset;
-    drwav_uint32 numSampleLoops;
-    drwav_uint32 samplerData;
-    drwav_smpl_loop loops[DRWAV_MAX_SMPL_LOOPS];
-} drwav_smpl;
-
-typedef struct
-{
-    /* A pointer to the function to call when more data is needed. */
-    drwav_read_proc onRead;
-
-    /* A pointer to the function to call when data needs to be written. Only used when the drwav object is opened in write mode. */
-    drwav_write_proc onWrite;
-
-    /* A pointer to the function to call when the wav file needs to be seeked. */
-    drwav_seek_proc onSeek;
-
-    /* The user data to pass to callbacks. */
-    void* pUserData;
-
-    /* Allocation callbacks. */
-    drwav_allocation_callbacks allocationCallbacks;
-
-
-    /* Whether or not the WAV file is formatted as a standard RIFF file or W64. */
-    drwav_container container;
-
-
-    /* Structure containing format information exactly as specified by the wav file. */
-    drwav_fmt fmt;
-
-    /* The sample rate. Will be set to something like 44100. */
-    drwav_uint32 sampleRate;
-
-    /* The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. */
-    drwav_uint16 channels;
-
-    /* The bits per sample. Will be set to something like 16, 24, etc. */
-    drwav_uint16 bitsPerSample;
-
-    /* Equal to fmt.formatTag, or the value specified by fmt.subFormat if fmt.formatTag is equal to 65534 (WAVE_FORMAT_EXTENSIBLE). */
-    drwav_uint16 translatedFormatTag;
-
-    /* The total number of PCM frames making up the audio data. */
-    drwav_uint64 totalPCMFrameCount;
-
-
-    /* The size in bytes of the data chunk. */
-    drwav_uint64 dataChunkDataSize;
-    
-    /* The position in the stream of the first byte of the data chunk. This is used for seeking. */
-    drwav_uint64 dataChunkDataPos;
-
-    /* The number of bytes remaining in the data chunk. */
-    drwav_uint64 bytesRemaining;
-
-
-    /*
-    Only used in sequential write mode. Keeps track of the desired size of the "data" chunk at the point of initialization time. Always
-    set to 0 for non-sequential writes and when the drwav object is opened in read mode. Used for validation.
-    */
-    drwav_uint64 dataChunkDataSizeTargetWrite;
-
-    /* Keeps track of whether or not the wav writer was initialized in sequential mode. */
-    drwav_bool32 isSequentialWrite;
-
-
-    /* smpl chunk. */
-    drwav_smpl smpl;
-
-
-    /* A hack to avoid a DRWAV_MALLOC() when opening a decoder with drwav_init_memory(). */
-    drwav__memory_stream memoryStream;
-    drwav__memory_stream_write memoryStreamWrite;
-
-    /* Generic data for compressed formats. This data is shared across all block-compressed formats. */
-    struct
-    {
-        drwav_uint64 iCurrentPCMFrame;  /* The index of the next PCM frame that will be read by drwav_read_*(). This is used with "totalPCMFrameCount" to ensure we don't read excess samples at the end of the last block. */
-    } compressed;
-    
-    /* Microsoft ADPCM specific data. */
-    struct
-    {
-        drwav_uint32 bytesRemainingInBlock;
-        drwav_uint16 predictor[2];
-        drwav_int32  delta[2];
-        drwav_int32  cachedFrames[4];  /* Samples are stored in this cache during decoding. */
-        drwav_uint32 cachedFrameCount;
-        drwav_int32  prevFrames[2][2]; /* The previous 2 samples for each channel (2 channels at most). */
-    } msadpcm;
-
-    /* IMA ADPCM specific data. */
-    struct
-    {
-        drwav_uint32 bytesRemainingInBlock;
-        drwav_int32  predictor[2];
-        drwav_int32  stepIndex[2];
-        drwav_int32  cachedFrames[16]; /* Samples are stored in this cache during decoding. */
-        drwav_uint32 cachedFrameCount;
-    } ima;
-} drwav;
-
-
-/*
-Initializes a pre-allocated drwav object for reading.
-
-pWav                         [out]          A pointer to the drwav object being initialized.
-onRead                       [in]           The function to call when data needs to be read from the client.
-onSeek                       [in]           The function to call when the read position of the client data needs to move.
-onChunk                      [in, optional] The function to call when a chunk is enumerated at initialized time.
-pUserData, pReadSeekUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek.
-pChunkUserData               [in, optional] A pointer to application defined data that will be passed to onChunk.
-flags                        [in, optional] A set of flags for controlling how things are loaded.
-
-Returns true if successful; false otherwise.
-
-Close the loader with drwav_uninit().
-
-This is the lowest level function for initializing a WAV file. You can also use drwav_init_file() and drwav_init_memory()
-to open the stream from a file or from a block of memory respectively.
-
-Possible values for flags:
-  DRWAV_SEQUENTIAL: Never perform a backwards seek while loading. This disables the chunk callback and will cause this function
-                    to return as soon as the data chunk is found. Any chunks after the data chunk will be ignored.
-
-drwav_init() is equivalent to "drwav_init_ex(pWav, onRead, onSeek, NULL, pUserData, NULL, 0);".
-
-The onChunk callback is not called for the WAVE or FMT chunks. The contents of the FMT chunk can be read from pWav->fmt
-after the function returns.
-
-See also: drwav_init_file(), drwav_init_memory(), drwav_uninit()
-*/
-DRWAV_API drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
-
-/*
-Initializes a pre-allocated drwav object for writing.
-
-onWrite   [in]           The function to call when data needs to be written.
-onSeek    [in]           The function to call when the write position needs to move.
-pUserData [in, optional] A pointer to application defined data that will be passed to onWrite and onSeek.
-
-Returns true if successful; false otherwise.
-
-Close the writer with drwav_uninit().
-
-This is the lowest level function for initializing a WAV file. You can also use drwav_init_file_write() and drwav_init_memory_write()
-to open the stream from a file or from a block of memory respectively.
-
-If the total sample count is known, you can use drwav_init_write_sequential(). This avoids the need for dr_wav to perform
-a post-processing step for storing the total sample count and the size of the data chunk which requires a backwards seek.
-
-See also: drwav_init_file_write(), drwav_init_memory_write(), drwav_uninit()
-*/
-DRWAV_API drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_write_sequential(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_write_sequential_pcm_frames(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
-
-/*
-Utility function to determine the target size of the entire data to be written (including all headers and chunks).
-
-Returns the target size in bytes.
-
-Useful if the application needs to know the size to allocate.
-
-Only writing to the RIFF chunk and one data chunk is currently supported.
-
-See also: drwav_init_write(), drwav_init_file_write(), drwav_init_memory_write()
-*/
-DRWAV_API drwav_uint64 drwav_target_write_size_bytes(const drwav_data_format* pFormat, drwav_uint64 totalSampleCount);
-
-/*
-Uninitializes the given drwav object.
-
-Use this only for objects initialized with drwav_init*() functions (drwav_init(), drwav_init_ex(), drwav_init_write(), drwav_init_write_sequential()).
-*/
-DRWAV_API drwav_result drwav_uninit(drwav* pWav);
-
-
-/*
-Reads raw audio data.
-
-This is the lowest level function for reading audio data. It simply reads the given number of
-bytes of the raw internal sample data.
-
-Consider using drwav_read_pcm_frames_s16(), drwav_read_pcm_frames_s32() or drwav_read_pcm_frames_f32() for
-reading sample data in a consistent format.
-
-Returns the number of bytes actually read.
-*/
-DRWAV_API size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut);
-
-/*
-Reads up to the specified number of PCM frames from the WAV file.
-
-The output data will be in the file's internal format, converted to native-endian byte order. Use
-drwav_read_pcm_frames_s16/f32/s32() to read data in a specific format.
-
-If the return value is less than <framesToRead> it means the end of the file has been reached or
-you have requested more PCM frames than can possibly fit in the output buffer.
-
-This function will only work when sample data is of a fixed size and uncompressed. If you are
-using a compressed format consider using drwav_read_raw() or drwav_read_pcm_frames_s16/s32/f32().
-*/
-DRWAV_API drwav_uint64 drwav_read_pcm_frames(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut);
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_le(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut);
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_be(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut);
-
-/*
-Seeks to the given PCM frame.
-
-Returns true if successful; false otherwise.
-*/
-DRWAV_API drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64 targetFrameIndex);
-
-
-/*
-Writes raw audio data.
-
-Returns the number of bytes actually written. If this differs from bytesToWrite, it indicates an error.
-*/
-DRWAV_API size_t drwav_write_raw(drwav* pWav, size_t bytesToWrite, const void* pData);
-
-/*
-Writes PCM frames.
-
-Returns the number of PCM frames written.
-
-Input samples need to be in native-endian byte order. On big-endian architectures the input data will be converted to
-little-endian. Use drwav_write_raw() to write raw audio data without performing any conversion.
-*/
-DRWAV_API drwav_uint64 drwav_write_pcm_frames(drwav* pWav, drwav_uint64 framesToWrite, const void* pData);
-DRWAV_API drwav_uint64 drwav_write_pcm_frames_le(drwav* pWav, drwav_uint64 framesToWrite, const void* pData);
-DRWAV_API drwav_uint64 drwav_write_pcm_frames_be(drwav* pWav, drwav_uint64 framesToWrite, const void* pData);
-
-
-/* Conversion Utilities */
-#ifndef DR_WAV_NO_CONVERSION_API
-
-/*
-Reads a chunk of audio data and converts it to signed 16-bit PCM samples.
-
-Returns the number of PCM frames actually read.
-
-If the return value is less than <framesToRead> it means the end of the file has been reached.
-*/
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut);
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16le(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut);
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16be(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut);
-
-/* Low-level function for converting unsigned 8-bit PCM samples to signed 16-bit PCM samples. */
-DRWAV_API void drwav_u8_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-/* Low-level function for converting signed 24-bit PCM samples to signed 16-bit PCM samples. */
-DRWAV_API void drwav_s24_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-/* Low-level function for converting signed 32-bit PCM samples to signed 16-bit PCM samples. */
-DRWAV_API void drwav_s32_to_s16(drwav_int16* pOut, const drwav_int32* pIn, size_t sampleCount);
-
-/* Low-level function for converting IEEE 32-bit floating point samples to signed 16-bit PCM samples. */
-DRWAV_API void drwav_f32_to_s16(drwav_int16* pOut, const float* pIn, size_t sampleCount);
-
-/* Low-level function for converting IEEE 64-bit floating point samples to signed 16-bit PCM samples. */
-DRWAV_API void drwav_f64_to_s16(drwav_int16* pOut, const double* pIn, size_t sampleCount);
-
-/* Low-level function for converting A-law samples to signed 16-bit PCM samples. */
-DRWAV_API void drwav_alaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-/* Low-level function for converting u-law samples to signed 16-bit PCM samples. */
-DRWAV_API void drwav_mulaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-
-/*
-Reads a chunk of audio data and converts it to IEEE 32-bit floating point samples.
-
-Returns the number of PCM frames actually read.
-
-If the return value is less than <framesToRead> it means the end of the file has been reached.
-*/
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut);
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32le(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut);
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32be(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut);
-
-/* Low-level function for converting unsigned 8-bit PCM samples to IEEE 32-bit floating point samples. */
-DRWAV_API void drwav_u8_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-/* Low-level function for converting signed 16-bit PCM samples to IEEE 32-bit floating point samples. */
-DRWAV_API void drwav_s16_to_f32(float* pOut, const drwav_int16* pIn, size_t sampleCount);
-
-/* Low-level function for converting signed 24-bit PCM samples to IEEE 32-bit floating point samples. */
-DRWAV_API void drwav_s24_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-/* Low-level function for converting signed 32-bit PCM samples to IEEE 32-bit floating point samples. */
-DRWAV_API void drwav_s32_to_f32(float* pOut, const drwav_int32* pIn, size_t sampleCount);
-
-/* Low-level function for converting IEEE 64-bit floating point samples to IEEE 32-bit floating point samples. */
-DRWAV_API void drwav_f64_to_f32(float* pOut, const double* pIn, size_t sampleCount);
-
-/* Low-level function for converting A-law samples to IEEE 32-bit floating point samples. */
-DRWAV_API void drwav_alaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-/* Low-level function for converting u-law samples to IEEE 32-bit floating point samples. */
-DRWAV_API void drwav_mulaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-
-/*
-Reads a chunk of audio data and converts it to signed 32-bit PCM samples.
-
-Returns the number of PCM frames actually read.
-
-If the return value is less than <framesToRead> it means the end of the file has been reached.
-*/
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut);
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32le(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut);
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32be(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut);
-
-/* Low-level function for converting unsigned 8-bit PCM samples to signed 32-bit PCM samples. */
-DRWAV_API void drwav_u8_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-/* Low-level function for converting signed 16-bit PCM samples to signed 32-bit PCM samples. */
-DRWAV_API void drwav_s16_to_s32(drwav_int32* pOut, const drwav_int16* pIn, size_t sampleCount);
-
-/* Low-level function for converting signed 24-bit PCM samples to signed 32-bit PCM samples. */
-DRWAV_API void drwav_s24_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-/* Low-level function for converting IEEE 32-bit floating point samples to signed 32-bit PCM samples. */
-DRWAV_API void drwav_f32_to_s32(drwav_int32* pOut, const float* pIn, size_t sampleCount);
-
-/* Low-level function for converting IEEE 64-bit floating point samples to signed 32-bit PCM samples. */
-DRWAV_API void drwav_f64_to_s32(drwav_int32* pOut, const double* pIn, size_t sampleCount);
-
-/* Low-level function for converting A-law samples to signed 32-bit PCM samples. */
-DRWAV_API void drwav_alaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-/* Low-level function for converting u-law samples to signed 32-bit PCM samples. */
-DRWAV_API void drwav_mulaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
-
-#endif  /* DR_WAV_NO_CONVERSION_API */
-
-
-/* High-Level Convenience Helpers */
-
-#ifndef DR_WAV_NO_STDIO
-/*
-Helper for initializing a wave file for reading using stdio.
-
-This holds the internal FILE object until drwav_uninit() is called. Keep this in mind if you're caching drwav
-objects because the operating system may restrict the number of file handles an application can have open at
-any given time.
-*/
-DRWAV_API drwav_bool32 drwav_init_file(drwav* pWav, const char* filename, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_file_ex(drwav* pWav, const char* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_file_w(drwav* pWav, const wchar_t* filename, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_file_ex_w(drwav* pWav, const wchar_t* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
-
-/*
-Helper for initializing a wave file for writing using stdio.
-
-This holds the internal FILE object until drwav_uninit() is called. Keep this in mind if you're caching drwav
-objects because the operating system may restrict the number of file handles an application can have open at
-any given time.
-*/
-DRWAV_API drwav_bool32 drwav_init_file_write(drwav* pWav, const char* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_file_write_sequential(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_file_write_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_file_write_sequential_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks);
-#endif  /* DR_WAV_NO_STDIO */
-
-/*
-Helper for initializing a loader from a pre-allocated memory buffer.
-
-This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for
-the lifetime of the drwav object.
-
-The buffer should contain the contents of the entire wave file, not just the sample data.
-*/
-DRWAV_API drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_memory_ex(drwav* pWav, const void* data, size_t dataSize, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
-
-/*
-Helper for initializing a writer which outputs data to a memory buffer.
-
-dr_wav will manage the memory allocations, however it is up to the caller to free the data with drwav_free().
-
-The buffer will remain allocated even after drwav_uninit() is called. Indeed, the buffer should not be
-considered valid until after drwav_uninit() has been called anyway.
-*/
-DRWAV_API drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_memory_write_sequential(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_bool32 drwav_init_memory_write_sequential_pcm_frames(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks);
-
-
-#ifndef DR_WAV_NO_CONVERSION_API
-/*
-Opens and reads an entire wav file in a single operation.
-
-The return value is a heap-allocated buffer containing the audio data. Use drwav_free() to free the buffer.
-*/
-DRWAV_API drwav_int16* drwav_open_and_read_pcm_frames_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API float* drwav_open_and_read_pcm_frames_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_int32* drwav_open_and_read_pcm_frames_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-#ifndef DR_WAV_NO_STDIO
-/*
-Opens and decodes an entire wav file in a single operation.
-
-The return value is a heap-allocated buffer containing the audio data. Use drwav_free() to free the buffer.
-*/
-DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API float* drwav_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API float* drwav_open_file_and_read_pcm_frames_f32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-#endif
-/*
-Opens and decodes an entire wav file from a block of memory in a single operation.
-
-The return value is a heap-allocated buffer containing the audio data. Use drwav_free() to free the buffer.
-*/
-DRWAV_API drwav_int16* drwav_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API float* drwav_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-DRWAV_API drwav_int32* drwav_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
-#endif
-
-/* Frees data that was allocated internally by dr_wav. */
-DRWAV_API void drwav_free(void* p, const drwav_allocation_callbacks* pAllocationCallbacks);
-
-/* Converts bytes from a wav stream to a sized type of native endian. */
-DRWAV_API drwav_uint16 drwav_bytes_to_u16(const drwav_uint8* data);
-DRWAV_API drwav_int16 drwav_bytes_to_s16(const drwav_uint8* data);
-DRWAV_API drwav_uint32 drwav_bytes_to_u32(const drwav_uint8* data);
-DRWAV_API drwav_int32 drwav_bytes_to_s32(const drwav_uint8* data);
-DRWAV_API drwav_uint64 drwav_bytes_to_u64(const drwav_uint8* data);
-DRWAV_API drwav_int64 drwav_bytes_to_s64(const drwav_uint8* data);
-
-/* Compares a GUID for the purpose of checking the type of a Wave64 chunk. */
-DRWAV_API drwav_bool32 drwav_guid_equal(const drwav_uint8 a[16], const drwav_uint8 b[16]);
-
-/* Compares a four-character-code for the purpose of checking the type of a RIFF chunk. */
-DRWAV_API drwav_bool32 drwav_fourcc_equal(const drwav_uint8* a, const char* b);
-
-#ifdef __cplusplus
-}
-#endif
-#endif  /* dr_wav_h */
-
-
-/************************************************************************************************************************************************************
- ************************************************************************************************************************************************************
-
- IMPLEMENTATION
-
- ************************************************************************************************************************************************************
- ************************************************************************************************************************************************************/
-#if defined(DR_WAV_IMPLEMENTATION) || defined(DRWAV_IMPLEMENTATION)
-#include <stdlib.h>
-#include <string.h> /* For memcpy(), memset() */
-#include <limits.h> /* For INT_MAX */
-
-#ifndef DR_WAV_NO_STDIO
-#include <stdio.h>
-#include <wchar.h>
-#endif
-
-/* Standard library stuff. */
-#ifndef DRWAV_ASSERT
-#include <assert.h>
-#define DRWAV_ASSERT(expression)           assert(expression)
-#endif
-#ifndef DRWAV_MALLOC
-#define DRWAV_MALLOC(sz)                   malloc((sz))
-#endif
-#ifndef DRWAV_REALLOC
-#define DRWAV_REALLOC(p, sz)               realloc((p), (sz))
-#endif
-#ifndef DRWAV_FREE
-#define DRWAV_FREE(p)                      free((p))
-#endif
-#ifndef DRWAV_COPY_MEMORY
-#define DRWAV_COPY_MEMORY(dst, src, sz)    memcpy((dst), (src), (sz))
-#endif
-#ifndef DRWAV_ZERO_MEMORY
-#define DRWAV_ZERO_MEMORY(p, sz)           memset((p), 0, (sz))
-#endif
-#ifndef DRWAV_ZERO_OBJECT
-#define DRWAV_ZERO_OBJECT(p)               DRWAV_ZERO_MEMORY((p), sizeof(*p))
-#endif
-
-#define drwav_countof(x)                   (sizeof(x) / sizeof(x[0]))
-#define drwav_align(x, a)                  ((((x) + (a) - 1) / (a)) * (a))
-#define drwav_min(a, b)                    (((a) < (b)) ? (a) : (b))
-#define drwav_max(a, b)                    (((a) > (b)) ? (a) : (b))
-#define drwav_clamp(x, lo, hi)             (drwav_max((lo), drwav_min((hi), (x))))
-
-#define DRWAV_MAX_SIMD_VECTOR_SIZE         64  /* 64 for AVX-512 in the future. */
-
-/* CPU architecture. */
-#if defined(__x86_64__) || defined(_M_X64)
-    #define DRWAV_X64
-#elif defined(__i386) || defined(_M_IX86)
-    #define DRWAV_X86
-#elif defined(__arm__) || defined(_M_ARM)
-    #define DRWAV_ARM
-#endif
-
-#ifdef _MSC_VER
-    #define DRWAV_INLINE __forceinline
-#elif defined(__GNUC__)
-    /*
-    I've had a bug report where GCC is emitting warnings about functions possibly not being inlineable. This warning happens when
-    the __attribute__((always_inline)) attribute is defined without an "inline" statement. I think therefore there must be some
-    case where "__inline__" is not always defined, thus the compiler emitting these warnings. When using -std=c89 or -ansi on the
-    command line, we cannot use the "inline" keyword and instead need to use "__inline__". In an attempt to work around this issue
-    I am using "__inline__" only when we're compiling in strict ANSI mode.
-    */
-    #if defined(__STRICT_ANSI__)
-        #define DRWAV_INLINE __inline__ __attribute__((always_inline))
-    #else
-        #define DRWAV_INLINE inline __attribute__((always_inline))
-    #endif
-#else
-    #define DRWAV_INLINE
-#endif
-
-#if defined(SIZE_MAX)
-    #define DRWAV_SIZE_MAX  SIZE_MAX
-#else
-    #if defined(_WIN64) || defined(_LP64) || defined(__LP64__)
-        #define DRWAV_SIZE_MAX  ((drwav_uint64)0xFFFFFFFFFFFFFFFF)
-    #else
-        #define DRWAV_SIZE_MAX  0xFFFFFFFF
-    #endif
-#endif
-
-#if defined(_MSC_VER) && _MSC_VER >= 1400
-    #define DRWAV_HAS_BYTESWAP16_INTRINSIC
-    #define DRWAV_HAS_BYTESWAP32_INTRINSIC
-    #define DRWAV_HAS_BYTESWAP64_INTRINSIC
-#elif defined(__clang__)
-    #if defined(__has_builtin)
-        #if __has_builtin(__builtin_bswap16)
-            #define DRWAV_HAS_BYTESWAP16_INTRINSIC
-        #endif
-        #if __has_builtin(__builtin_bswap32)
-            #define DRWAV_HAS_BYTESWAP32_INTRINSIC
-        #endif
-        #if __has_builtin(__builtin_bswap64)
-            #define DRWAV_HAS_BYTESWAP64_INTRINSIC
-        #endif
-    #endif
-#elif defined(__GNUC__)
-    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
-        #define DRWAV_HAS_BYTESWAP32_INTRINSIC
-        #define DRWAV_HAS_BYTESWAP64_INTRINSIC
-    #endif
-    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
-        #define DRWAV_HAS_BYTESWAP16_INTRINSIC
-    #endif
-#endif
-
-DRWAV_API void drwav_version(drwav_uint32* pMajor, drwav_uint32* pMinor, drwav_uint32* pRevision)
-{
-    if (pMajor) {
-        *pMajor = DRWAV_VERSION_MAJOR;
-    }
-
-    if (pMinor) {
-        *pMinor = DRWAV_VERSION_MINOR;
-    }
-
-    if (pRevision) {
-        *pRevision = DRWAV_VERSION_REVISION;
-    }
-}
-
-DRWAV_API const char* drwav_version_string()
-{
-    return DRWAV_VERSION_STRING;
-}
-
-/*
-These limits are used for basic validation when initializing the decoder. If you exceed these limits, first of all: what on Earth are
-you doing?! (Let me know, I'd be curious!) Second, you can adjust these by #define-ing them before the dr_wav implementation.
-*/
-#ifndef DRWAV_MAX_SAMPLE_RATE
-#define DRWAV_MAX_SAMPLE_RATE       384000
-#endif
-#ifndef DRWAV_MAX_CHANNELS
-#define DRWAV_MAX_CHANNELS          256
-#endif
-#ifndef DRWAV_MAX_BITS_PER_SAMPLE
-#define DRWAV_MAX_BITS_PER_SAMPLE   64
-#endif
-
-static const drwav_uint8 drwavGUID_W64_RIFF[16] = {0x72,0x69,0x66,0x66, 0x2E,0x91, 0xCF,0x11, 0xA5,0xD6, 0x28,0xDB,0x04,0xC1,0x00,0x00};    /* 66666972-912E-11CF-A5D6-28DB04C10000 */
-static const drwav_uint8 drwavGUID_W64_WAVE[16] = {0x77,0x61,0x76,0x65, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 65766177-ACF3-11D3-8CD1-00C04F8EDB8A */
-static const drwav_uint8 drwavGUID_W64_JUNK[16] = {0x6A,0x75,0x6E,0x6B, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 6B6E756A-ACF3-11D3-8CD1-00C04F8EDB8A */
-static const drwav_uint8 drwavGUID_W64_FMT [16] = {0x66,0x6D,0x74,0x20, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 20746D66-ACF3-11D3-8CD1-00C04F8EDB8A */
-static const drwav_uint8 drwavGUID_W64_FACT[16] = {0x66,0x61,0x63,0x74, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 74636166-ACF3-11D3-8CD1-00C04F8EDB8A */
-static const drwav_uint8 drwavGUID_W64_DATA[16] = {0x64,0x61,0x74,0x61, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 61746164-ACF3-11D3-8CD1-00C04F8EDB8A */
-static const drwav_uint8 drwavGUID_W64_SMPL[16] = {0x73,0x6D,0x70,0x6C, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 6C706D73-ACF3-11D3-8CD1-00C04F8EDB8A */
-
-static DRWAV_INLINE drwav_bool32 drwav__guid_equal(const drwav_uint8 a[16], const drwav_uint8 b[16])
-{
-    int i;
-    for (i = 0; i < 16; i += 1) {
-        if (a[i] != b[i]) {
-            return DRWAV_FALSE;
-        }
-    }
-
-    return DRWAV_TRUE;
-}
-
-static DRWAV_INLINE drwav_bool32 drwav__fourcc_equal(const drwav_uint8* a, const char* b)
-{
-    return
-        a[0] == b[0] &&
-        a[1] == b[1] &&
-        a[2] == b[2] &&
-        a[3] == b[3];
-}
-
-
-
-static DRWAV_INLINE int drwav__is_little_endian(void)
-{
-#if defined(DRWAV_X86) || defined(DRWAV_X64)
-    return DRWAV_TRUE;
-#elif defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN
-    return DRWAV_TRUE;
-#else
-    int n = 1;
-    return (*(char*)&n) == 1;
-#endif
-}
-
-static DRWAV_INLINE drwav_uint16 drwav__bytes_to_u16(const drwav_uint8* data)
-{
-    return (data[0] << 0) | (data[1] << 8);
-}
-
-static DRWAV_INLINE drwav_int16 drwav__bytes_to_s16(const drwav_uint8* data)
-{
-    return (short)drwav__bytes_to_u16(data);
-}
-
-static DRWAV_INLINE drwav_uint32 drwav__bytes_to_u32(const drwav_uint8* data)
-{
-    return (data[0] << 0) | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
-}
-
-static DRWAV_INLINE drwav_int32 drwav__bytes_to_s32(const drwav_uint8* data)
-{
-    return (drwav_int32)drwav__bytes_to_u32(data);
-}
-
-static DRWAV_INLINE drwav_uint64 drwav__bytes_to_u64(const drwav_uint8* data)
-{
-    return
-        ((drwav_uint64)data[0] <<  0) | ((drwav_uint64)data[1] <<  8) | ((drwav_uint64)data[2] << 16) | ((drwav_uint64)data[3] << 24) |
-        ((drwav_uint64)data[4] << 32) | ((drwav_uint64)data[5] << 40) | ((drwav_uint64)data[6] << 48) | ((drwav_uint64)data[7] << 56);
-}
-
-static DRWAV_INLINE drwav_int64 drwav__bytes_to_s64(const drwav_uint8* data)
-{
-    return (drwav_int64)drwav__bytes_to_u64(data);
-}
-
-static DRWAV_INLINE void drwav__bytes_to_guid(const drwav_uint8* data, drwav_uint8* guid)
-{
-    int i;
-    for (i = 0; i < 16; ++i) {
-        guid[i] = data[i];
-    }
-}
-
-
-static DRWAV_INLINE drwav_uint16 drwav__bswap16(drwav_uint16 n)
-{
-#ifdef DRWAV_HAS_BYTESWAP16_INTRINSIC
-    #if defined(_MSC_VER)
-        return _byteswap_ushort(n);
-    #elif defined(__GNUC__) || defined(__clang__)
-        return __builtin_bswap16(n);
-    #else
-        #error "This compiler does not support the byte swap intrinsic."
-    #endif
-#else
-    return ((n & 0xFF00) >> 8) |
-           ((n & 0x00FF) << 8);
-#endif
-}
-
-static DRWAV_INLINE drwav_uint32 drwav__bswap32(drwav_uint32 n)
-{
-#ifdef DRWAV_HAS_BYTESWAP32_INTRINSIC
-    #if defined(_MSC_VER)
-        return _byteswap_ulong(n);
-    #elif defined(__GNUC__) || defined(__clang__)
-        #if defined(DRWAV_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 6) && !defined(DRWAV_64BIT)   /* <-- 64-bit inline assembly has not been tested, so disabling for now. */
-            /* Inline assembly optimized implementation for ARM. In my testing, GCC does not generate optimized code with __builtin_bswap32(). */
-            drwav_uint32 r;
-            __asm__ __volatile__ (
-            #if defined(DRWAV_64BIT)
-                "rev %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(n)   /* <-- This is untested. If someone in the community could test this, that would be appreciated! */
-            #else
-                "rev %[out], %[in]" : [out]"=r"(r) : [in]"r"(n)
-            #endif
-            );
-            return r;
-        #else
-            return __builtin_bswap32(n);
-        #endif
-    #else
-        #error "This compiler does not support the byte swap intrinsic."
-    #endif
-#else
-    return ((n & 0xFF000000) >> 24) |
-           ((n & 0x00FF0000) >>  8) |
-           ((n & 0x0000FF00) <<  8) |
-           ((n & 0x000000FF) << 24);
-#endif
-}
-
-static DRWAV_INLINE drwav_uint64 drwav__bswap64(drwav_uint64 n)
-{
-#ifdef DRWAV_HAS_BYTESWAP64_INTRINSIC
-    #if defined(_MSC_VER)
-        return _byteswap_uint64(n);
-    #elif defined(__GNUC__) || defined(__clang__)
-        return __builtin_bswap64(n);
-    #else
-        #error "This compiler does not support the byte swap intrinsic."
-    #endif
-#else
-    return ((n & (drwav_uint64)0xFF00000000000000) >> 56) |
-           ((n & (drwav_uint64)0x00FF000000000000) >> 40) |
-           ((n & (drwav_uint64)0x0000FF0000000000) >> 24) |
-           ((n & (drwav_uint64)0x000000FF00000000) >>  8) |
-           ((n & (drwav_uint64)0x00000000FF000000) <<  8) |
-           ((n & (drwav_uint64)0x0000000000FF0000) << 24) |
-           ((n & (drwav_uint64)0x000000000000FF00) << 40) |
-           ((n & (drwav_uint64)0x00000000000000FF) << 56);
-#endif
-}
-
-
-static DRWAV_INLINE drwav_int16 drwav__bswap_s16(drwav_int16 n)
-{
-    return (drwav_int16)drwav__bswap16((drwav_uint16)n);
-}
-
-static DRWAV_INLINE void drwav__bswap_samples_s16(drwav_int16* pSamples, drwav_uint64 sampleCount)
-{
-    drwav_uint64 iSample;
-    for (iSample = 0; iSample < sampleCount; iSample += 1) {
-        pSamples[iSample] = drwav__bswap_s16(pSamples[iSample]);
-    }
-}
-
-
-static DRWAV_INLINE void drwav__bswap_s24(drwav_uint8* p)
-{
-    drwav_uint8 t;
-    t = p[0];
-    p[0] = p[2];
-    p[2] = t;
-}
-
-static DRWAV_INLINE void drwav__bswap_samples_s24(drwav_uint8* pSamples, drwav_uint64 sampleCount)
-{
-    drwav_uint64 iSample;
-    for (iSample = 0; iSample < sampleCount; iSample += 1) {
-        drwav_uint8* pSample = pSamples + (iSample*3);
-        drwav__bswap_s24(pSample);
-    }
-}
-
-
-static DRWAV_INLINE drwav_int32 drwav__bswap_s32(drwav_int32 n)
-{
-    return (drwav_int32)drwav__bswap32((drwav_uint32)n);
-}
-
-static DRWAV_INLINE void drwav__bswap_samples_s32(drwav_int32* pSamples, drwav_uint64 sampleCount)
-{
-    drwav_uint64 iSample;
-    for (iSample = 0; iSample < sampleCount; iSample += 1) {
-        pSamples[iSample] = drwav__bswap_s32(pSamples[iSample]);
-    }
-}
-
-
-static DRWAV_INLINE float drwav__bswap_f32(float n)
-{
-    union {
-        drwav_uint32 i;
-        float f;
-    } x;
-    x.f = n;
-    x.i = drwav__bswap32(x.i);
-
-    return x.f;
-}
-
-static DRWAV_INLINE void drwav__bswap_samples_f32(float* pSamples, drwav_uint64 sampleCount)
-{
-    drwav_uint64 iSample;
-    for (iSample = 0; iSample < sampleCount; iSample += 1) {
-        pSamples[iSample] = drwav__bswap_f32(pSamples[iSample]);
-    }
-}
-
-
-static DRWAV_INLINE double drwav__bswap_f64(double n)
-{
-    union {
-        drwav_uint64 i;
-        double f;
-    } x;
-    x.f = n;
-    x.i = drwav__bswap64(x.i);
-
-    return x.f;
-}
-
-static DRWAV_INLINE void drwav__bswap_samples_f64(double* pSamples, drwav_uint64 sampleCount)
-{
-    drwav_uint64 iSample;
-    for (iSample = 0; iSample < sampleCount; iSample += 1) {
-        pSamples[iSample] = drwav__bswap_f64(pSamples[iSample]);
-    }
-}
-
-
-static DRWAV_INLINE void drwav__bswap_samples_pcm(void* pSamples, drwav_uint64 sampleCount, drwav_uint32 bytesPerSample)
-{
-    /* Assumes integer PCM. Floating point PCM is done in drwav__bswap_samples_ieee(). */
-    switch (bytesPerSample)
-    {
-        case 2: /* s16, s12 (loosely packed) */
-        {
-            drwav__bswap_samples_s16((drwav_int16*)pSamples, sampleCount);
-        } break;
-        case 3: /* s24 */
-        {
-            drwav__bswap_samples_s24((drwav_uint8*)pSamples, sampleCount);
-        } break;
-        case 4: /* s32 */
-        {
-            drwav__bswap_samples_s32((drwav_int32*)pSamples, sampleCount);
-        } break;
-        default:
-        {
-            /* Unsupported format. */
-            DRWAV_ASSERT(DRWAV_FALSE);
-        } break;
-    }
-}
-
-static DRWAV_INLINE void drwav__bswap_samples_ieee(void* pSamples, drwav_uint64 sampleCount, drwav_uint32 bytesPerSample)
-{
-    switch (bytesPerSample)
-    {
-    #if 0   /* Contributions welcome for f16 support. */
-        case 2: /* f16 */
-        {
-            drwav__bswap_samples_f16((drwav_float16*)pSamples, sampleCount);
-        } break;
-    #endif
-        case 4: /* f32 */
-        {
-            drwav__bswap_samples_f32((float*)pSamples, sampleCount);
-        } break;
-        case 8: /* f64 */
-        {
-            drwav__bswap_samples_f64((double*)pSamples, sampleCount);
-        } break;
-        default:
-        {
-            /* Unsupported format. */
-            DRWAV_ASSERT(DRWAV_FALSE);
-        } break;
-    }
-}
-
-static DRWAV_INLINE void drwav__bswap_samples(void* pSamples, drwav_uint64 sampleCount, drwav_uint32 bytesPerSample, drwav_uint16 format)
-{
-    switch (format)
-    {
-        case DR_WAVE_FORMAT_PCM:
-        {
-            drwav__bswap_samples_pcm(pSamples, sampleCount, bytesPerSample);
-        } break;
-
-        case DR_WAVE_FORMAT_IEEE_FLOAT:
-        {
-            drwav__bswap_samples_ieee(pSamples, sampleCount, bytesPerSample);
-        } break;
-
-        case DR_WAVE_FORMAT_ALAW:
-        case DR_WAVE_FORMAT_MULAW:
-        {
-            drwav__bswap_samples_s16((drwav_int16*)pSamples, sampleCount);
-        } break;
-
-        case DR_WAVE_FORMAT_ADPCM:
-        case DR_WAVE_FORMAT_DVI_ADPCM:
-        default:
-        {
-            /* Unsupported format. */
-            DRWAV_ASSERT(DRWAV_FALSE);
-        } break;
-    }
-}
-
-
-static void* drwav__malloc_default(size_t sz, void* pUserData)
-{
-    (void)pUserData;
-    return DRWAV_MALLOC(sz);
-}
-
-static void* drwav__realloc_default(void* p, size_t sz, void* pUserData)
-{
-    (void)pUserData;
-    return DRWAV_REALLOC(p, sz);
-}
-
-static void drwav__free_default(void* p, void* pUserData)
-{
-    (void)pUserData;
-    DRWAV_FREE(p);
-}
-
-
-static void* drwav__malloc_from_callbacks(size_t sz, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pAllocationCallbacks == NULL) {
-        return NULL;
-    }
-
-    if (pAllocationCallbacks->onMalloc != NULL) {
-        return pAllocationCallbacks->onMalloc(sz, pAllocationCallbacks->pUserData);
-    }
-
-    /* Try using realloc(). */
-    if (pAllocationCallbacks->onRealloc != NULL) {
-        return pAllocationCallbacks->onRealloc(NULL, sz, pAllocationCallbacks->pUserData);
-    }
-
-    return NULL;
-}
-
-static void* drwav__realloc_from_callbacks(void* p, size_t szNew, size_t szOld, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pAllocationCallbacks == NULL) {
-        return NULL;
-    }
-
-    if (pAllocationCallbacks->onRealloc != NULL) {
-        return pAllocationCallbacks->onRealloc(p, szNew, pAllocationCallbacks->pUserData);
-    }
-
-    /* Try emulating realloc() in terms of malloc()/free(). */
-    if (pAllocationCallbacks->onMalloc != NULL && pAllocationCallbacks->onFree != NULL) {
-        void* p2;
-
-        p2 = pAllocationCallbacks->onMalloc(szNew, pAllocationCallbacks->pUserData);
-        if (p2 == NULL) {
-            return NULL;
-        }
-
-        if (p != NULL) {
-            DRWAV_COPY_MEMORY(p2, p, szOld);
-            pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
-        }
-
-        return p2;
-    }
-
-    return NULL;
-}
-
-static void drwav__free_from_callbacks(void* p, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (p == NULL || pAllocationCallbacks == NULL) {
-        return;
-    }
-
-    if (pAllocationCallbacks->onFree != NULL) {
-        pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
-    }
-}
-
-
-static drwav_allocation_callbacks drwav_copy_allocation_callbacks_or_defaults(const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pAllocationCallbacks != NULL) {
-        /* Copy. */
-        return *pAllocationCallbacks;
-    } else {
-        /* Defaults. */
-        drwav_allocation_callbacks allocationCallbacks;
-        allocationCallbacks.pUserData = NULL;
-        allocationCallbacks.onMalloc  = drwav__malloc_default;
-        allocationCallbacks.onRealloc = drwav__realloc_default;
-        allocationCallbacks.onFree    = drwav__free_default;
-        return allocationCallbacks;
-    }
-}
-
-
-static DRWAV_INLINE drwav_bool32 drwav__is_compressed_format_tag(drwav_uint16 formatTag)
-{
-    return
-        formatTag == DR_WAVE_FORMAT_ADPCM ||
-        formatTag == DR_WAVE_FORMAT_DVI_ADPCM;
-}
-
-static unsigned int drwav__chunk_padding_size_riff(drwav_uint64 chunkSize)
-{
-    return (unsigned int)(chunkSize % 2);
-}
-
-static unsigned int drwav__chunk_padding_size_w64(drwav_uint64 chunkSize)
-{
-    return (unsigned int)(chunkSize % 8);
-}
-
-static drwav_uint64 drwav_read_pcm_frames_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut);
-static drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut);
-static drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount);
-
-static drwav_result drwav__read_chunk_header(drwav_read_proc onRead, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav_chunk_header* pHeaderOut)
-{
-    if (container == drwav_container_riff) {
-        drwav_uint8 sizeInBytes[4];
-
-        if (onRead(pUserData, pHeaderOut->id.fourcc, 4) != 4) {
-            return DRWAV_AT_END;
-        }
-
-        if (onRead(pUserData, sizeInBytes, 4) != 4) {
-            return DRWAV_INVALID_FILE;
-        }
-
-        pHeaderOut->sizeInBytes = drwav__bytes_to_u32(sizeInBytes);
-        pHeaderOut->paddingSize = drwav__chunk_padding_size_riff(pHeaderOut->sizeInBytes);
-        *pRunningBytesReadOut += 8;
-    } else {
-        drwav_uint8 sizeInBytes[8];
-
-        if (onRead(pUserData, pHeaderOut->id.guid, 16) != 16) {
-            return DRWAV_AT_END;
-        }
-
-        if (onRead(pUserData, sizeInBytes, 8) != 8) {
-            return DRWAV_INVALID_FILE;
-        }
-
-        pHeaderOut->sizeInBytes = drwav__bytes_to_u64(sizeInBytes) - 24;    /* <-- Subtract 24 because w64 includes the size of the header. */
-        pHeaderOut->paddingSize = drwav__chunk_padding_size_w64(pHeaderOut->sizeInBytes);
-        *pRunningBytesReadOut += 24;
-    }
-
-    return DRWAV_SUCCESS;
-}
-
-static drwav_bool32 drwav__seek_forward(drwav_seek_proc onSeek, drwav_uint64 offset, void* pUserData)
-{
-    drwav_uint64 bytesRemainingToSeek = offset;
-    while (bytesRemainingToSeek > 0) {
-        if (bytesRemainingToSeek > 0x7FFFFFFF) {
-            if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_current)) {
-                return DRWAV_FALSE;
-            }
-            bytesRemainingToSeek -= 0x7FFFFFFF;
-        } else {
-            if (!onSeek(pUserData, (int)bytesRemainingToSeek, drwav_seek_origin_current)) {
-                return DRWAV_FALSE;
-            }
-            bytesRemainingToSeek = 0;
-        }
-    }
-
-    return DRWAV_TRUE;
-}
-
-static drwav_bool32 drwav__seek_from_start(drwav_seek_proc onSeek, drwav_uint64 offset, void* pUserData)
-{
-    if (offset <= 0x7FFFFFFF) {
-        return onSeek(pUserData, (int)offset, drwav_seek_origin_start);
-    }
-
-    /* Larger than 32-bit seek. */
-    if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_start)) {
-        return DRWAV_FALSE;
-    }
-    offset -= 0x7FFFFFFF;
-
-    for (;;) {
-        if (offset <= 0x7FFFFFFF) {
-            return onSeek(pUserData, (int)offset, drwav_seek_origin_current);
-        }
-
-        if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_current)) {
-            return DRWAV_FALSE;
-        }
-        offset -= 0x7FFFFFFF;
-    }
-
-    /* Should never get here. */
-    /*return DRWAV_TRUE; */
-}
-
-
-static drwav_bool32 drwav__read_fmt(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav_fmt* fmtOut)
-{
-    drwav_chunk_header header;
-    drwav_uint8 fmt[16];
-
-    if (drwav__read_chunk_header(onRead, pUserData, container, pRunningBytesReadOut, &header) != DRWAV_SUCCESS) {
-        return DRWAV_FALSE;
-    }
-
-
-    /* Skip non-fmt chunks. */
-    while ((container == drwav_container_riff && !drwav__fourcc_equal(header.id.fourcc, "fmt ")) || (container == drwav_container_w64 && !drwav__guid_equal(header.id.guid, drwavGUID_W64_FMT))) {
-        if (!drwav__seek_forward(onSeek, header.sizeInBytes + header.paddingSize, pUserData)) {
-            return DRWAV_FALSE;
-        }
-        *pRunningBytesReadOut += header.sizeInBytes + header.paddingSize;
-
-        /* Try the next header. */
-        if (drwav__read_chunk_header(onRead, pUserData, container, pRunningBytesReadOut, &header) != DRWAV_SUCCESS) {
-            return DRWAV_FALSE;
-        }
-    }
-
-
-    /* Validation. */
-    if (container == drwav_container_riff) {
-        if (!drwav__fourcc_equal(header.id.fourcc, "fmt ")) {
-            return DRWAV_FALSE;
-        }
-    } else {
-        if (!drwav__guid_equal(header.id.guid, drwavGUID_W64_FMT)) {
-            return DRWAV_FALSE;
-        }
-    }
-
-
-    if (onRead(pUserData, fmt, sizeof(fmt)) != sizeof(fmt)) {
-        return DRWAV_FALSE;
-    }
-    *pRunningBytesReadOut += sizeof(fmt);
-
-    fmtOut->formatTag      = drwav__bytes_to_u16(fmt + 0);
-    fmtOut->channels       = drwav__bytes_to_u16(fmt + 2);
-    fmtOut->sampleRate     = drwav__bytes_to_u32(fmt + 4);
-    fmtOut->avgBytesPerSec = drwav__bytes_to_u32(fmt + 8);
-    fmtOut->blockAlign     = drwav__bytes_to_u16(fmt + 12);
-    fmtOut->bitsPerSample  = drwav__bytes_to_u16(fmt + 14);
-
-    fmtOut->extendedSize       = 0;
-    fmtOut->validBitsPerSample = 0;
-    fmtOut->channelMask        = 0;
-    memset(fmtOut->subFormat, 0, sizeof(fmtOut->subFormat));
-
-    if (header.sizeInBytes > 16) {
-        drwav_uint8 fmt_cbSize[2];
-        int bytesReadSoFar = 0;
-
-        if (onRead(pUserData, fmt_cbSize, sizeof(fmt_cbSize)) != sizeof(fmt_cbSize)) {
-            return DRWAV_FALSE;    /* Expecting more data. */
-        }
-        *pRunningBytesReadOut += sizeof(fmt_cbSize);
-
-        bytesReadSoFar = 18;
-
-        fmtOut->extendedSize = drwav__bytes_to_u16(fmt_cbSize);
-        if (fmtOut->extendedSize > 0) {
-            /* Simple validation. */
-            if (fmtOut->formatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
-                if (fmtOut->extendedSize != 22) {
-                    return DRWAV_FALSE;
-                }
-            }
-
-            if (fmtOut->formatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
-                drwav_uint8 fmtext[22];
-                if (onRead(pUserData, fmtext, fmtOut->extendedSize) != fmtOut->extendedSize) {
-                    return DRWAV_FALSE;    /* Expecting more data. */
-                }
-
-                fmtOut->validBitsPerSample = drwav__bytes_to_u16(fmtext + 0);
-                fmtOut->channelMask        = drwav__bytes_to_u32(fmtext + 2);
-                drwav__bytes_to_guid(fmtext + 6, fmtOut->subFormat);
-            } else {
-                if (!onSeek(pUserData, fmtOut->extendedSize, drwav_seek_origin_current)) {
-                    return DRWAV_FALSE;
-                }
-            }
-            *pRunningBytesReadOut += fmtOut->extendedSize;
-
-            bytesReadSoFar += fmtOut->extendedSize;
-        }
-
-        /* Seek past any leftover bytes. For w64 the leftover will be defined based on the chunk size. */
-        if (!onSeek(pUserData, (int)(header.sizeInBytes - bytesReadSoFar), drwav_seek_origin_current)) {
-            return DRWAV_FALSE;
-        }
-        *pRunningBytesReadOut += (header.sizeInBytes - bytesReadSoFar);
-    }
-
-    if (header.paddingSize > 0) {
-        if (!onSeek(pUserData, header.paddingSize, drwav_seek_origin_current)) {
-            return DRWAV_FALSE;
-        }
-        *pRunningBytesReadOut += header.paddingSize;
-    }
-
-    return DRWAV_TRUE;
-}
-
-
-static size_t drwav__on_read(drwav_read_proc onRead, void* pUserData, void* pBufferOut, size_t bytesToRead, drwav_uint64* pCursor)
-{
-    size_t bytesRead;
-
-    DRWAV_ASSERT(onRead != NULL);
-    DRWAV_ASSERT(pCursor != NULL);
-
-    bytesRead = onRead(pUserData, pBufferOut, bytesToRead);
-    *pCursor += bytesRead;
-    return bytesRead;
-}
-
-#if 0
-static drwav_bool32 drwav__on_seek(drwav_seek_proc onSeek, void* pUserData, int offset, drwav_seek_origin origin, drwav_uint64* pCursor)
-{
-    DRWAV_ASSERT(onSeek != NULL);
-    DRWAV_ASSERT(pCursor != NULL);
-
-    if (!onSeek(pUserData, offset, origin)) {
-        return DRWAV_FALSE;
-    }
-
-    if (origin == drwav_seek_origin_start) {
-        *pCursor = offset;
-    } else {
-        *pCursor += offset;
-    }
-
-    return DRWAV_TRUE;
-}
-#endif
-
-
-
-static drwav_uint32 drwav_get_bytes_per_pcm_frame(drwav* pWav)
-{
-    /*
-    The bytes per frame is a bit ambiguous. It can be either be based on the bits per sample, or the block align. The way I'm doing it here
-    is that if the bits per sample is a multiple of 8, use floor(bitsPerSample*channels/8), otherwise fall back to the block align.
-    */
-    if ((pWav->bitsPerSample & 0x7) == 0) {
-        /* Bits per sample is a multiple of 8. */
-        return (pWav->bitsPerSample * pWav->fmt.channels) >> 3;
-    } else {
-        return pWav->fmt.blockAlign;
-    }
-}
-
-DRWAV_API drwav_uint16 drwav_fmt_get_format(const drwav_fmt* pFMT)
-{
-    if (pFMT == NULL) {
-        return 0;
-    }
-
-    if (pFMT->formatTag != DR_WAVE_FORMAT_EXTENSIBLE) {
-        return pFMT->formatTag;
-    } else {
-        return drwav__bytes_to_u16(pFMT->subFormat);    /* Only the first two bytes are required. */
-    }
-}
-
-static drwav_bool32 drwav_preinit(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pReadSeekUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pWav == NULL || onRead == NULL || onSeek == NULL) {
-        return DRWAV_FALSE;
-    }
-
-    DRWAV_ZERO_MEMORY(pWav, sizeof(*pWav));
-    pWav->onRead    = onRead;
-    pWav->onSeek    = onSeek;
-    pWav->pUserData = pReadSeekUserData;
-    pWav->allocationCallbacks = drwav_copy_allocation_callbacks_or_defaults(pAllocationCallbacks);
-
-    if (pWav->allocationCallbacks.onFree == NULL || (pWav->allocationCallbacks.onMalloc == NULL && pWav->allocationCallbacks.onRealloc == NULL)) {
-        return DRWAV_FALSE;    /* Invalid allocation callbacks. */
-    }
-
-    return DRWAV_TRUE;
-}
-
-static drwav_bool32 drwav_init__internal(drwav* pWav, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags)
-{
-    /* This function assumes drwav_preinit() has been called beforehand. */
-
-    drwav_uint64 cursor;    /* <-- Keeps track of the byte position so we can seek to specific locations. */
-    drwav_bool32 sequential;
-    drwav_uint8 riff[4];
-    drwav_fmt fmt;
-    unsigned short translatedFormatTag;
-    drwav_uint64 sampleCountFromFactChunk;
-    drwav_bool32 foundDataChunk;
-    drwav_uint64 dataChunkSize;
-    drwav_uint64 chunkSize;
-
-    cursor = 0;
-    sequential = (flags & DRWAV_SEQUENTIAL) != 0;
-
-    /* The first 4 bytes should be the RIFF identifier. */
-    if (drwav__on_read(pWav->onRead, pWav->pUserData, riff, sizeof(riff), &cursor) != sizeof(riff)) {
-        return DRWAV_FALSE;
-    }
-
-    /*
-    The first 4 bytes can be used to identify the container. For RIFF files it will start with "RIFF" and for
-    w64 it will start with "riff".
-    */
-    if (drwav__fourcc_equal(riff, "RIFF")) {
-        pWav->container = drwav_container_riff;
-    } else if (drwav__fourcc_equal(riff, "riff")) {
-        int i;
-        drwav_uint8 riff2[12];
-
-        pWav->container = drwav_container_w64;
-
-        /* Check the rest of the GUID for validity. */
-        if (drwav__on_read(pWav->onRead, pWav->pUserData, riff2, sizeof(riff2), &cursor) != sizeof(riff2)) {
-            return DRWAV_FALSE;
-        }
-
-        for (i = 0; i < 12; ++i) {
-            if (riff2[i] != drwavGUID_W64_RIFF[i+4]) {
-                return DRWAV_FALSE;
-            }
-        }
-    } else {
-        return DRWAV_FALSE;   /* Unknown or unsupported container. */
-    }
-
-
-    if (pWav->container == drwav_container_riff) {
-        drwav_uint8 chunkSizeBytes[4];
-        drwav_uint8 wave[4];
-
-        /* RIFF/WAVE */
-        if (drwav__on_read(pWav->onRead, pWav->pUserData, chunkSizeBytes, sizeof(chunkSizeBytes), &cursor) != sizeof(chunkSizeBytes)) {
-            return DRWAV_FALSE;
-        }
-
-        if (drwav__bytes_to_u32(chunkSizeBytes) < 36) {
-            return DRWAV_FALSE;    /* Chunk size should always be at least 36 bytes. */
-        }
-
-        if (drwav__on_read(pWav->onRead, pWav->pUserData, wave, sizeof(wave), &cursor) != sizeof(wave)) {
-            return DRWAV_FALSE;
-        }
-
-        if (!drwav__fourcc_equal(wave, "WAVE")) {
-            return DRWAV_FALSE;    /* Expecting "WAVE". */
-        }
-    } else {
-        drwav_uint8 chunkSizeBytes[8];
-        drwav_uint8 wave[16];
-
-        /* W64 */
-        if (drwav__on_read(pWav->onRead, pWav->pUserData, chunkSizeBytes, sizeof(chunkSizeBytes), &cursor) != sizeof(chunkSizeBytes)) {
-            return DRWAV_FALSE;
-        }
-
-        if (drwav__bytes_to_u64(chunkSizeBytes) < 80) {
-            return DRWAV_FALSE;
-        }
-
-        if (drwav__on_read(pWav->onRead, pWav->pUserData, wave, sizeof(wave), &cursor) != sizeof(wave)) {
-            return DRWAV_FALSE;
-        }
-
-        if (!drwav__guid_equal(wave, drwavGUID_W64_WAVE)) {
-            return DRWAV_FALSE;
-        }
-    }
-
-
-    /* The next bytes should be the "fmt " chunk. */
-    if (!drwav__read_fmt(pWav->onRead, pWav->onSeek, pWav->pUserData, pWav->container, &cursor, &fmt)) {
-        return DRWAV_FALSE;    /* Failed to read the "fmt " chunk. */
-    }
-
-    /* Basic validation. */
-    if ((fmt.sampleRate    == 0 || fmt.sampleRate    > DRWAV_MAX_SAMPLE_RATE)     ||
-        (fmt.channels      == 0 || fmt.channels      > DRWAV_MAX_CHANNELS)        ||
-        (fmt.bitsPerSample == 0 || fmt.bitsPerSample > DRWAV_MAX_BITS_PER_SAMPLE) ||
-        fmt.blockAlign == 0) {
-        return DRWAV_FALSE; /* Probably an invalid WAV file. */
-    }
-
-
-    /* Translate the internal format. */
-    translatedFormatTag = fmt.formatTag;
-    if (translatedFormatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
-        translatedFormatTag = drwav__bytes_to_u16(fmt.subFormat + 0);
-    }
-
-
-
-    sampleCountFromFactChunk = 0;
-
-    /*
-    We need to enumerate over each chunk for two reasons:
-      1) The "data" chunk may not be the next one
-      2) We may want to report each chunk back to the client
-    
-    In order to correctly report each chunk back to the client we will need to keep looping until the end of the file.
-    */
-    foundDataChunk = DRWAV_FALSE;
-    dataChunkSize = 0;
-
-    /* The next chunk we care about is the "data" chunk. This is not necessarily the next chunk so we'll need to loop. */
-    for (;;)
-    {
-        drwav_chunk_header header;
-        drwav_result result = drwav__read_chunk_header(pWav->onRead, pWav->pUserData, pWav->container, &cursor, &header);
-        if (result != DRWAV_SUCCESS) {
-            if (!foundDataChunk) {
-                return DRWAV_FALSE;
-            } else {
-                break;  /* Probably at the end of the file. Get out of the loop. */
-            }
-        }
-
-        /* Tell the client about this chunk. */
-        if (!sequential && onChunk != NULL) {
-            drwav_uint64 callbackBytesRead = onChunk(pChunkUserData, pWav->onRead, pWav->onSeek, pWav->pUserData, &header, pWav->container, &fmt);
-
-            /*
-            dr_wav may need to read the contents of the chunk, so we now need to seek back to the position before
-            we called the callback.
-            */
-            if (callbackBytesRead > 0) {
-                if (!drwav__seek_from_start(pWav->onSeek, cursor, pWav->pUserData)) {
-                    return DRWAV_FALSE;
-                }
-            }
-        }
-        
-
-        if (!foundDataChunk) {
-            pWav->dataChunkDataPos = cursor;
-        }
-
-        chunkSize = header.sizeInBytes;
-        if (pWav->container == drwav_container_riff) {
-            if (drwav__fourcc_equal(header.id.fourcc, "data")) {
-                foundDataChunk = DRWAV_TRUE;
-                dataChunkSize = chunkSize;
-            }
-        } else {
-            if (drwav__guid_equal(header.id.guid, drwavGUID_W64_DATA)) {
-                foundDataChunk = DRWAV_TRUE;
-                dataChunkSize = chunkSize;
-            }
-        }
-
-        /*
-        If at this point we have found the data chunk and we're running in sequential mode, we need to break out of this loop. The reason for
-        this is that we would otherwise require a backwards seek which sequential mode forbids.
-        */
-        if (foundDataChunk && sequential) {
-            break;
-        }
-
-        /* Optional. Get the total sample count from the FACT chunk. This is useful for compressed formats. */
-        if (pWav->container == drwav_container_riff) {
-            if (drwav__fourcc_equal(header.id.fourcc, "fact")) {
-                drwav_uint32 sampleCount;
-                if (drwav__on_read(pWav->onRead, pWav->pUserData, &sampleCount, 4, &cursor) != 4) {
-                    return DRWAV_FALSE;
-                }
-                chunkSize -= 4;
-
-                if (!foundDataChunk) {
-                    pWav->dataChunkDataPos = cursor;
-                }
-
-                /*
-                The sample count in the "fact" chunk is either unreliable, or I'm not understanding it properly. For now I am only enabling this
-                for Microsoft ADPCM formats.
-                */
-                if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
-                    sampleCountFromFactChunk = sampleCount;
-                } else {
-                    sampleCountFromFactChunk = 0;
-                }
-            }
-        } else {
-            if (drwav__guid_equal(header.id.guid, drwavGUID_W64_FACT)) {
-                if (drwav__on_read(pWav->onRead, pWav->pUserData, &sampleCountFromFactChunk, 8, &cursor) != 8) {
-                    return DRWAV_FALSE;
-                }
-                chunkSize -= 8;
-
-                if (!foundDataChunk) {
-                    pWav->dataChunkDataPos = cursor;
-                }
-            }
-        }
-
-        /* "smpl" chunk. */
-        if (pWav->container == drwav_container_riff) {
-            if (drwav__fourcc_equal(header.id.fourcc, "smpl")) {
-                drwav_uint8 smplHeaderData[36];    /* 36 = size of the smpl header section, not including the loop data. */
-                if (chunkSize >= sizeof(smplHeaderData)) {
-                    drwav_uint64 bytesJustRead = drwav__on_read(pWav->onRead, pWav->pUserData, smplHeaderData, sizeof(smplHeaderData), &cursor);
-                    chunkSize -= bytesJustRead;
-
-                    if (bytesJustRead == sizeof(smplHeaderData)) {
-                        drwav_uint32 iLoop;
-
-                        pWav->smpl.manufacturer      = drwav__bytes_to_u32(smplHeaderData+0);
-                        pWav->smpl.product           = drwav__bytes_to_u32(smplHeaderData+4);
-                        pWav->smpl.samplePeriod      = drwav__bytes_to_u32(smplHeaderData+8);
-                        pWav->smpl.midiUnityNotes    = drwav__bytes_to_u32(smplHeaderData+12);
-                        pWav->smpl.midiPitchFraction = drwav__bytes_to_u32(smplHeaderData+16);
-                        pWav->smpl.smpteFormat       = drwav__bytes_to_u32(smplHeaderData+20);
-                        pWav->smpl.smpteOffset       = drwav__bytes_to_u32(smplHeaderData+24);
-                        pWav->smpl.numSampleLoops    = drwav__bytes_to_u32(smplHeaderData+28);
-                        pWav->smpl.samplerData       = drwav__bytes_to_u32(smplHeaderData+32);
-
-                        for (iLoop = 0; iLoop < pWav->smpl.numSampleLoops && iLoop < drwav_countof(pWav->smpl.loops); ++iLoop) {
-                            drwav_uint8 smplLoopData[24];  /* 24 = size of a loop section in the smpl chunk. */
-                            bytesJustRead = drwav__on_read(pWav->onRead, pWav->pUserData, smplLoopData, sizeof(smplLoopData), &cursor);
-                            chunkSize -= bytesJustRead;
-
-                            if (bytesJustRead == sizeof(smplLoopData)) {
-                                pWav->smpl.loops[iLoop].cuePointId = drwav__bytes_to_u32(smplLoopData+0);
-                                pWav->smpl.loops[iLoop].type       = drwav__bytes_to_u32(smplLoopData+4);
-                                pWav->smpl.loops[iLoop].start      = drwav__bytes_to_u32(smplLoopData+8);
-                                pWav->smpl.loops[iLoop].end        = drwav__bytes_to_u32(smplLoopData+12);
-                                pWav->smpl.loops[iLoop].fraction   = drwav__bytes_to_u32(smplLoopData+16);
-                                pWav->smpl.loops[iLoop].playCount  = drwav__bytes_to_u32(smplLoopData+20);
-                            } else {
-                                break;  /* Break from the smpl loop for loop. */
-                            }
-                        }
-                    }
-                } else {
-                    /* Looks like invalid data. Ignore the chunk. */
-                }
-            }
-        } else {
-            if (drwav__guid_equal(header.id.guid, drwavGUID_W64_SMPL)) {
-                /*
-                This path will be hit when a W64 WAV file contains a smpl chunk. I don't have a sample file to test this path, so a contribution
-                is welcome to add support for this.
-                */
-            }
-        }
-
-        /* Make sure we seek past the padding. */
-        chunkSize += header.paddingSize;
-        if (!drwav__seek_forward(pWav->onSeek, chunkSize, pWav->pUserData)) {
-            break;
-        }
-        cursor += chunkSize;
-
-        if (!foundDataChunk) {
-            pWav->dataChunkDataPos = cursor;
-        }
-    }
-
-    /* If we haven't found a data chunk, return an error. */
-    if (!foundDataChunk) {
-        return DRWAV_FALSE;
-    }
-
-    /* We may have moved passed the data chunk. If so we need to move back. If running in sequential mode we can assume we are already sitting on the data chunk. */
-    if (!sequential) {
-        if (!drwav__seek_from_start(pWav->onSeek, pWav->dataChunkDataPos, pWav->pUserData)) {
-            return DRWAV_FALSE;
-        }
-        cursor = pWav->dataChunkDataPos;
-    }
-    
-
-    /* At this point we should be sitting on the first byte of the raw audio data. */
-
-    pWav->fmt                 = fmt;
-    pWav->sampleRate          = fmt.sampleRate;
-    pWav->channels            = fmt.channels;
-    pWav->bitsPerSample       = fmt.bitsPerSample;
-    pWav->bytesRemaining      = dataChunkSize;
-    pWav->translatedFormatTag = translatedFormatTag;
-    pWav->dataChunkDataSize   = dataChunkSize;
-
-    if (sampleCountFromFactChunk != 0) {
-        pWav->totalPCMFrameCount = sampleCountFromFactChunk;
-    } else {
-        pWav->totalPCMFrameCount = dataChunkSize / drwav_get_bytes_per_pcm_frame(pWav);
-
-        if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
-            drwav_uint64 totalBlockHeaderSizeInBytes;
-            drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
-
-            /* Make sure any trailing partial block is accounted for. */
-            if ((blockCount * fmt.blockAlign) < dataChunkSize) {
-                blockCount += 1;
-            }
-
-            /* We decode two samples per byte. There will be blockCount headers in the data chunk. This is enough to know how to calculate the total PCM frame count. */
-            totalBlockHeaderSizeInBytes = blockCount * (6*fmt.channels);
-            pWav->totalPCMFrameCount = ((dataChunkSize - totalBlockHeaderSizeInBytes) * 2) / fmt.channels;
-        }
-        if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
-            drwav_uint64 totalBlockHeaderSizeInBytes;
-            drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
-
-            /* Make sure any trailing partial block is accounted for. */
-            if ((blockCount * fmt.blockAlign) < dataChunkSize) {
-                blockCount += 1;
-            }
-
-            /* We decode two samples per byte. There will be blockCount headers in the data chunk. This is enough to know how to calculate the total PCM frame count. */
-            totalBlockHeaderSizeInBytes = blockCount * (4*fmt.channels);
-            pWav->totalPCMFrameCount = ((dataChunkSize - totalBlockHeaderSizeInBytes) * 2) / fmt.channels;
-
-            /* The header includes a decoded sample for each channel which acts as the initial predictor sample. */
-            pWav->totalPCMFrameCount += blockCount;
-        }
-    }
-
-    /* Some formats only support a certain number of channels. */
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM || pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
-        if (pWav->channels > 2) {
-            return DRWAV_FALSE;
-        }
-    }
-
-#ifdef DR_WAV_LIBSNDFILE_COMPAT
-    /*
-    I use libsndfile as a benchmark for testing, however in the version I'm using (from the Windows installer on the libsndfile website),
-    it appears the total sample count libsndfile uses for MS-ADPCM is incorrect. It would seem they are computing the total sample count
-    from the number of blocks, however this results in the inclusion of extra silent samples at the end of the last block. The correct
-    way to know the total sample count is to inspect the "fact" chunk, which should always be present for compressed formats, and should
-    always include the sample count. This little block of code below is only used to emulate the libsndfile logic so I can properly run my
-    correctness tests against libsndfile, and is disabled by default.
-    */
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
-        drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
-        pWav->totalPCMFrameCount = (((blockCount * (fmt.blockAlign - (6*pWav->channels))) * 2)) / fmt.channels;  /* x2 because two samples per byte. */
-    }
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
-        drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
-        pWav->totalPCMFrameCount = (((blockCount * (fmt.blockAlign - (4*pWav->channels))) * 2) + (blockCount * pWav->channels)) / fmt.channels;
-    }
-#endif
-
-    return DRWAV_TRUE;
-}
-
-DRWAV_API drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    return drwav_init_ex(pWav, onRead, onSeek, NULL, pUserData, NULL, 0, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (!drwav_preinit(pWav, onRead, onSeek, pReadSeekUserData, pAllocationCallbacks)) {
-        return DRWAV_FALSE;
-    }
-
-    return drwav_init__internal(pWav, onChunk, pChunkUserData, flags);
-}
-
-
-static drwav_uint32 drwav__riff_chunk_size_riff(drwav_uint64 dataChunkSize)
-{
-    drwav_uint32 dataSubchunkPaddingSize = drwav__chunk_padding_size_riff(dataChunkSize);
-
-    if (dataChunkSize <= (0xFFFFFFFFUL - 36 - dataSubchunkPaddingSize)) {
-        return 36 + (drwav_uint32)(dataChunkSize + dataSubchunkPaddingSize);
-    } else {
-        return 0xFFFFFFFF;
-    }
-}
-
-static drwav_uint32 drwav__data_chunk_size_riff(drwav_uint64 dataChunkSize)
-{
-    if (dataChunkSize <= 0xFFFFFFFFUL) {
-        return (drwav_uint32)dataChunkSize;
-    } else {
-        return 0xFFFFFFFFUL;
-    }
-}
-
-static drwav_uint64 drwav__riff_chunk_size_w64(drwav_uint64 dataChunkSize)
-{
-    drwav_uint64 dataSubchunkPaddingSize = drwav__chunk_padding_size_w64(dataChunkSize);
-
-    return 80 + 24 + dataChunkSize + dataSubchunkPaddingSize;   /* +24 because W64 includes the size of the GUID and size fields. */
-}
-
-static drwav_uint64 drwav__data_chunk_size_w64(drwav_uint64 dataChunkSize)
-{
-    return 24 + dataChunkSize;        /* +24 because W64 includes the size of the GUID and size fields. */
-}
-
-
-static drwav_bool32 drwav_preinit_write(drwav* pWav, const drwav_data_format* pFormat, drwav_bool32 isSequential, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pWav == NULL || onWrite == NULL) {
-        return DRWAV_FALSE;
-    }
-
-    if (!isSequential && onSeek == NULL) {
-        return DRWAV_FALSE; /* <-- onSeek is required when in non-sequential mode. */
-    }
-
-    /* Not currently supporting compressed formats. Will need to add support for the "fact" chunk before we enable this. */
-    if (pFormat->format == DR_WAVE_FORMAT_EXTENSIBLE) {
-        return DRWAV_FALSE;
-    }
-    if (pFormat->format == DR_WAVE_FORMAT_ADPCM || pFormat->format == DR_WAVE_FORMAT_DVI_ADPCM) {
-        return DRWAV_FALSE;
-    }
-
-    DRWAV_ZERO_MEMORY(pWav, sizeof(*pWav));
-    pWav->onWrite   = onWrite;
-    pWav->onSeek    = onSeek;
-    pWav->pUserData = pUserData;
-    pWav->allocationCallbacks = drwav_copy_allocation_callbacks_or_defaults(pAllocationCallbacks);
-
-    if (pWav->allocationCallbacks.onFree == NULL || (pWav->allocationCallbacks.onMalloc == NULL && pWav->allocationCallbacks.onRealloc == NULL)) {
-        return DRWAV_FALSE;    /* Invalid allocation callbacks. */
-    }
-
-    pWav->fmt.formatTag = (drwav_uint16)pFormat->format;
-    pWav->fmt.channels = (drwav_uint16)pFormat->channels;
-    pWav->fmt.sampleRate = pFormat->sampleRate;
-    pWav->fmt.avgBytesPerSec = (drwav_uint32)((pFormat->bitsPerSample * pFormat->sampleRate * pFormat->channels) / 8);
-    pWav->fmt.blockAlign = (drwav_uint16)((pFormat->channels * pFormat->bitsPerSample) / 8);
-    pWav->fmt.bitsPerSample = (drwav_uint16)pFormat->bitsPerSample;
-    pWav->fmt.extendedSize = 0;
-    pWav->isSequentialWrite = isSequential;
-
-    return DRWAV_TRUE;
-}
-
-static drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount)
-{
-    /* The function assumes drwav_preinit_write() was called beforehand. */
-
-    size_t runningPos = 0;
-    drwav_uint64 initialDataChunkSize = 0;
-    drwav_uint64 chunkSizeFMT;
-
-    /*
-    The initial values for the "RIFF" and "data" chunks depends on whether or not we are initializing in sequential mode or not. In
-    sequential mode we set this to its final values straight away since they can be calculated from the total sample count. In non-
-    sequential mode we initialize it all to zero and fill it out in drwav_uninit() using a backwards seek.
-    */
-    if (pWav->isSequentialWrite) {
-        initialDataChunkSize = (totalSampleCount * pWav->fmt.bitsPerSample) / 8;
-
-        /*
-        The RIFF container has a limit on the number of samples. drwav is not allowing this. There's no practical limits for Wave64
-        so for the sake of simplicity I'm not doing any validation for that.
-        */
-        if (pFormat->container == drwav_container_riff) {
-            if (initialDataChunkSize > (0xFFFFFFFFUL - 36)) {
-                return DRWAV_FALSE; /* Not enough room to store every sample. */
-            }
-        }
-    }
-
-    pWav->dataChunkDataSizeTargetWrite = initialDataChunkSize;
-
-
-    /* "RIFF" chunk. */
-    if (pFormat->container == drwav_container_riff) {
-        drwav_uint32 chunkSizeRIFF = 36 + (drwav_uint32)initialDataChunkSize;   /* +36 = "RIFF"+[RIFF Chunk Size]+"WAVE" + [sizeof "fmt " chunk] */
-        runningPos += pWav->onWrite(pWav->pUserData, "RIFF", 4);
-        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeRIFF, 4);
-        runningPos += pWav->onWrite(pWav->pUserData, "WAVE", 4);
-    } else {
-        drwav_uint64 chunkSizeRIFF = 80 + 24 + initialDataChunkSize;   /* +24 because W64 includes the size of the GUID and size fields. */
-        runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_RIFF, 16);
-        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeRIFF, 8);
-        runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_WAVE, 16);
-    }
-
-    /* "fmt " chunk. */
-    if (pFormat->container == drwav_container_riff) {
-        chunkSizeFMT = 16;
-        runningPos += pWav->onWrite(pWav->pUserData, "fmt ", 4);
-        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeFMT, 4);
-    } else {
-        chunkSizeFMT = 40;
-        runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_FMT, 16);
-        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeFMT, 8);
-    }
-
-    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.formatTag,      2);
-    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.channels,       2);
-    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.sampleRate,     4);
-    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.avgBytesPerSec, 4);
-    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.blockAlign,     2);
-    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.bitsPerSample,  2);
-
-    pWav->dataChunkDataPos = runningPos;
-
-    /* "data" chunk. */
-    if (pFormat->container == drwav_container_riff) {
-        drwav_uint32 chunkSizeDATA = (drwav_uint32)initialDataChunkSize;
-        runningPos += pWav->onWrite(pWav->pUserData, "data", 4);
-        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeDATA, 4);
-    } else {
-        drwav_uint64 chunkSizeDATA = 24 + initialDataChunkSize; /* +24 because W64 includes the size of the GUID and size fields. */
-        runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_DATA, 16);
-        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeDATA, 8);
-    }
-
-
-    /* Simple validation. */
-    if (pFormat->container == drwav_container_riff) {
-        if (runningPos != 20 + chunkSizeFMT + 8) {
-            return DRWAV_FALSE;
-        }
-    } else {
-        if (runningPos != 40 + chunkSizeFMT + 24) {
-            return DRWAV_FALSE;
-        }
-    }
-    
-
-    /* Set some properties for the client's convenience. */
-    pWav->container = pFormat->container;
-    pWav->channels = (drwav_uint16)pFormat->channels;
-    pWav->sampleRate = pFormat->sampleRate;
-    pWav->bitsPerSample = (drwav_uint16)pFormat->bitsPerSample;
-    pWav->translatedFormatTag = (drwav_uint16)pFormat->format;
-
-    return DRWAV_TRUE;
-}
-
-
-DRWAV_API drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (!drwav_preinit_write(pWav, pFormat, DRWAV_FALSE, onWrite, onSeek, pUserData, pAllocationCallbacks)) {
-        return DRWAV_FALSE;
-    }
-
-    return drwav_init_write__internal(pWav, pFormat, 0);               /* DRWAV_FALSE = Not Sequential */
-}
-
-DRWAV_API drwav_bool32 drwav_init_write_sequential(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (!drwav_preinit_write(pWav, pFormat, DRWAV_TRUE, onWrite, NULL, pUserData, pAllocationCallbacks)) {
-        return DRWAV_FALSE;
-    }
-
-    return drwav_init_write__internal(pWav, pFormat, totalSampleCount); /* DRWAV_TRUE = Sequential */
-}
-
-DRWAV_API drwav_bool32 drwav_init_write_sequential_pcm_frames(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pFormat == NULL) {
-        return DRWAV_FALSE;
-    }
-
-    return drwav_init_write_sequential(pWav, pFormat, totalPCMFrameCount*pFormat->channels, onWrite, pUserData, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_uint64 drwav_target_write_size_bytes(const drwav_data_format* pFormat, drwav_uint64 totalSampleCount)
-{
-    /* Casting totalSampleCount to drwav_int64 for VC6 compatibility. No issues in practice because nobody is going to exhaust the whole 63 bits. */
-    drwav_uint64 targetDataSizeBytes = (drwav_uint64)((drwav_int64)totalSampleCount * pFormat->channels * pFormat->bitsPerSample/8.0);
-    drwav_uint64 riffChunkSizeBytes;
-    drwav_uint64 fileSizeBytes;
-
-    if (pFormat->container == drwav_container_riff) {
-        riffChunkSizeBytes = drwav__riff_chunk_size_riff(targetDataSizeBytes);
-        fileSizeBytes = (8 + riffChunkSizeBytes); /* +8 because WAV doesn't include the size of the ChunkID and ChunkSize fields. */
-    } else {
-        riffChunkSizeBytes = drwav__riff_chunk_size_w64(targetDataSizeBytes);
-        fileSizeBytes = riffChunkSizeBytes;
-    }
-
-    return fileSizeBytes;
-}
-
-
-#ifndef DR_WAV_NO_STDIO
-
-/* drwav_result_from_errno() is only used for fopen() and wfopen() so putting it inside DR_WAV_NO_STDIO for now. If something else needs this later we can move it out. */
-#include <errno.h>
-static drwav_result drwav_result_from_errno(int e)
-{
-    switch (e)
-    {
-        case 0: return DRWAV_SUCCESS;
-    #ifdef EPERM
-        case EPERM: return DRWAV_INVALID_OPERATION;
-    #endif
-    #ifdef ENOENT
-        case ENOENT: return DRWAV_DOES_NOT_EXIST;
-    #endif
-    #ifdef ESRCH
-        case ESRCH: return DRWAV_DOES_NOT_EXIST;
-    #endif
-    #ifdef EINTR
-        case EINTR: return DRWAV_INTERRUPT;
-    #endif
-    #ifdef EIO
-        case EIO: return DRWAV_IO_ERROR;
-    #endif
-    #ifdef ENXIO
-        case ENXIO: return DRWAV_DOES_NOT_EXIST;
-    #endif
-    #ifdef E2BIG
-        case E2BIG: return DRWAV_INVALID_ARGS;
-    #endif
-    #ifdef ENOEXEC
-        case ENOEXEC: return DRWAV_INVALID_FILE;
-    #endif
-    #ifdef EBADF
-        case EBADF: return DRWAV_INVALID_FILE;
-    #endif
-    #ifdef ECHILD
-        case ECHILD: return DRWAV_ERROR;
-    #endif
-    #ifdef EAGAIN
-        case EAGAIN: return DRWAV_UNAVAILABLE;
-    #endif
-    #ifdef ENOMEM
-        case ENOMEM: return DRWAV_OUT_OF_MEMORY;
-    #endif
-    #ifdef EACCES
-        case EACCES: return DRWAV_ACCESS_DENIED;
-    #endif
-    #ifdef EFAULT
-        case EFAULT: return DRWAV_BAD_ADDRESS;
-    #endif
-    #ifdef ENOTBLK
-        case ENOTBLK: return DRWAV_ERROR;
-    #endif
-    #ifdef EBUSY
-        case EBUSY: return DRWAV_BUSY;
-    #endif
-    #ifdef EEXIST
-        case EEXIST: return DRWAV_ALREADY_EXISTS;
-    #endif
-    #ifdef EXDEV
-        case EXDEV: return DRWAV_ERROR;
-    #endif
-    #ifdef ENODEV
-        case ENODEV: return DRWAV_DOES_NOT_EXIST;
-    #endif
-    #ifdef ENOTDIR
-        case ENOTDIR: return DRWAV_NOT_DIRECTORY;
-    #endif
-    #ifdef EISDIR
-        case EISDIR: return DRWAV_IS_DIRECTORY;
-    #endif
-    #ifdef EINVAL
-        case EINVAL: return DRWAV_INVALID_ARGS;
-    #endif
-    #ifdef ENFILE
-        case ENFILE: return DRWAV_TOO_MANY_OPEN_FILES;
-    #endif
-    #ifdef EMFILE
-        case EMFILE: return DRWAV_TOO_MANY_OPEN_FILES;
-    #endif
-    #ifdef ENOTTY
-        case ENOTTY: return DRWAV_INVALID_OPERATION;
-    #endif
-    #ifdef ETXTBSY
-        case ETXTBSY: return DRWAV_BUSY;
-    #endif
-    #ifdef EFBIG
-        case EFBIG: return DRWAV_TOO_BIG;
-    #endif
-    #ifdef ENOSPC
-        case ENOSPC: return DRWAV_NO_SPACE;
-    #endif
-    #ifdef ESPIPE
-        case ESPIPE: return DRWAV_BAD_SEEK;
-    #endif
-    #ifdef EROFS
-        case EROFS: return DRWAV_ACCESS_DENIED;
-    #endif
-    #ifdef EMLINK
-        case EMLINK: return DRWAV_TOO_MANY_LINKS;
-    #endif
-    #ifdef EPIPE
-        case EPIPE: return DRWAV_BAD_PIPE;
-    #endif
-    #ifdef EDOM
-        case EDOM: return DRWAV_OUT_OF_RANGE;
-    #endif
-    #ifdef ERANGE
-        case ERANGE: return DRWAV_OUT_OF_RANGE;
-    #endif
-    #ifdef EDEADLK
-        case EDEADLK: return DRWAV_DEADLOCK;
-    #endif
-    #ifdef ENAMETOOLONG
-        case ENAMETOOLONG: return DRWAV_PATH_TOO_LONG;
-    #endif
-    #ifdef ENOLCK
-        case ENOLCK: return DRWAV_ERROR;
-    #endif
-    #ifdef ENOSYS
-        case ENOSYS: return DRWAV_NOT_IMPLEMENTED;
-    #endif
-    #ifdef ENOTEMPTY
-        case ENOTEMPTY: return DRWAV_DIRECTORY_NOT_EMPTY;
-    #endif
-    #ifdef ELOOP
-        case ELOOP: return DRWAV_TOO_MANY_LINKS;
-    #endif
-    #ifdef ENOMSG
-        case ENOMSG: return DRWAV_NO_MESSAGE;
-    #endif
-    #ifdef EIDRM
-        case EIDRM: return DRWAV_ERROR;
-    #endif
-    #ifdef ECHRNG
-        case ECHRNG: return DRWAV_ERROR;
-    #endif
-    #ifdef EL2NSYNC
-        case EL2NSYNC: return DRWAV_ERROR;
-    #endif
-    #ifdef EL3HLT
-        case EL3HLT: return DRWAV_ERROR;
-    #endif
-    #ifdef EL3RST
-        case EL3RST: return DRWAV_ERROR;
-    #endif
-    #ifdef ELNRNG
-        case ELNRNG: return DRWAV_OUT_OF_RANGE;
-    #endif
-    #ifdef EUNATCH
-        case EUNATCH: return DRWAV_ERROR;
-    #endif
-    #ifdef ENOCSI
-        case ENOCSI: return DRWAV_ERROR;
-    #endif
-    #ifdef EL2HLT
-        case EL2HLT: return DRWAV_ERROR;
-    #endif
-    #ifdef EBADE
-        case EBADE: return DRWAV_ERROR;
-    #endif
-    #ifdef EBADR
-        case EBADR: return DRWAV_ERROR;
-    #endif
-    #ifdef EXFULL
-        case EXFULL: return DRWAV_ERROR;
-    #endif
-    #ifdef ENOANO
-        case ENOANO: return DRWAV_ERROR;
-    #endif
-    #ifdef EBADRQC
-        case EBADRQC: return DRWAV_ERROR;
-    #endif
-    #ifdef EBADSLT
-        case EBADSLT: return DRWAV_ERROR;
-    #endif
-    #ifdef EBFONT
-        case EBFONT: return DRWAV_INVALID_FILE;
-    #endif
-    #ifdef ENOSTR
-        case ENOSTR: return DRWAV_ERROR;
-    #endif
-    #ifdef ENODATA
-        case ENODATA: return DRWAV_NO_DATA_AVAILABLE;
-    #endif
-    #ifdef ETIME
-        case ETIME: return DRWAV_TIMEOUT;
-    #endif
-    #ifdef ENOSR
-        case ENOSR: return DRWAV_NO_DATA_AVAILABLE;
-    #endif
-    #ifdef ENONET
-        case ENONET: return DRWAV_NO_NETWORK;
-    #endif
-    #ifdef ENOPKG
-        case ENOPKG: return DRWAV_ERROR;
-    #endif
-    #ifdef EREMOTE
-        case EREMOTE: return DRWAV_ERROR;
-    #endif
-    #ifdef ENOLINK
-        case ENOLINK: return DRWAV_ERROR;
-    #endif
-    #ifdef EADV
-        case EADV: return DRWAV_ERROR;
-    #endif
-    #ifdef ESRMNT
-        case ESRMNT: return DRWAV_ERROR;
-    #endif
-    #ifdef ECOMM
-        case ECOMM: return DRWAV_ERROR;
-    #endif
-    #ifdef EPROTO
-        case EPROTO: return DRWAV_ERROR;
-    #endif
-    #ifdef EMULTIHOP
-        case EMULTIHOP: return DRWAV_ERROR;
-    #endif
-    #ifdef EDOTDOT
-        case EDOTDOT: return DRWAV_ERROR;
-    #endif
-    #ifdef EBADMSG
-        case EBADMSG: return DRWAV_BAD_MESSAGE;
-    #endif
-    #ifdef EOVERFLOW
-        case EOVERFLOW: return DRWAV_TOO_BIG;
-    #endif
-    #ifdef ENOTUNIQ
-        case ENOTUNIQ: return DRWAV_NOT_UNIQUE;
-    #endif
-    #ifdef EBADFD
-        case EBADFD: return DRWAV_ERROR;
-    #endif
-    #ifdef EREMCHG
-        case EREMCHG: return DRWAV_ERROR;
-    #endif
-    #ifdef ELIBACC
-        case ELIBACC: return DRWAV_ACCESS_DENIED;
-    #endif
-    #ifdef ELIBBAD
-        case ELIBBAD: return DRWAV_INVALID_FILE;
-    #endif
-    #ifdef ELIBSCN
-        case ELIBSCN: return DRWAV_INVALID_FILE;
-    #endif
-    #ifdef ELIBMAX
-        case ELIBMAX: return DRWAV_ERROR;
-    #endif
-    #ifdef ELIBEXEC
-        case ELIBEXEC: return DRWAV_ERROR;
-    #endif
-    #ifdef EILSEQ
-        case EILSEQ: return DRWAV_INVALID_DATA;
-    #endif
-    #ifdef ERESTART
-        case ERESTART: return DRWAV_ERROR;
-    #endif
-    #ifdef ESTRPIPE
-        case ESTRPIPE: return DRWAV_ERROR;
-    #endif
-    #ifdef EUSERS
-        case EUSERS: return DRWAV_ERROR;
-    #endif
-    #ifdef ENOTSOCK
-        case ENOTSOCK: return DRWAV_NOT_SOCKET;
-    #endif
-    #ifdef EDESTADDRREQ
-        case EDESTADDRREQ: return DRWAV_NO_ADDRESS;
-    #endif
-    #ifdef EMSGSIZE
-        case EMSGSIZE: return DRWAV_TOO_BIG;
-    #endif
-    #ifdef EPROTOTYPE
-        case EPROTOTYPE: return DRWAV_BAD_PROTOCOL;
-    #endif
-    #ifdef ENOPROTOOPT
-        case ENOPROTOOPT: return DRWAV_PROTOCOL_UNAVAILABLE;
-    #endif
-    #ifdef EPROTONOSUPPORT
-        case EPROTONOSUPPORT: return DRWAV_PROTOCOL_NOT_SUPPORTED;
-    #endif
-    #ifdef ESOCKTNOSUPPORT
-        case ESOCKTNOSUPPORT: return DRWAV_SOCKET_NOT_SUPPORTED;
-    #endif
-    #ifdef EOPNOTSUPP
-        case EOPNOTSUPP: return DRWAV_INVALID_OPERATION;
-    #endif
-    #ifdef EPFNOSUPPORT
-        case EPFNOSUPPORT: return DRWAV_PROTOCOL_FAMILY_NOT_SUPPORTED;
-    #endif
-    #ifdef EAFNOSUPPORT
-        case EAFNOSUPPORT: return DRWAV_ADDRESS_FAMILY_NOT_SUPPORTED;
-    #endif
-    #ifdef EADDRINUSE
-        case EADDRINUSE: return DRWAV_ALREADY_IN_USE;
-    #endif
-    #ifdef EADDRNOTAVAIL
-        case EADDRNOTAVAIL: return DRWAV_ERROR;
-    #endif
-    #ifdef ENETDOWN
-        case ENETDOWN: return DRWAV_NO_NETWORK;
-    #endif
-    #ifdef ENETUNREACH
-        case ENETUNREACH: return DRWAV_NO_NETWORK;
-    #endif
-    #ifdef ENETRESET
-        case ENETRESET: return DRWAV_NO_NETWORK;
-    #endif
-    #ifdef ECONNABORTED
-        case ECONNABORTED: return DRWAV_NO_NETWORK;
-    #endif
-    #ifdef ECONNRESET
-        case ECONNRESET: return DRWAV_CONNECTION_RESET;
-    #endif
-    #ifdef ENOBUFS
-        case ENOBUFS: return DRWAV_NO_SPACE;
-    #endif
-    #ifdef EISCONN
-        case EISCONN: return DRWAV_ALREADY_CONNECTED;
-    #endif
-    #ifdef ENOTCONN
-        case ENOTCONN: return DRWAV_NOT_CONNECTED;
-    #endif
-    #ifdef ESHUTDOWN
-        case ESHUTDOWN: return DRWAV_ERROR;
-    #endif
-    #ifdef ETOOMANYREFS
-        case ETOOMANYREFS: return DRWAV_ERROR;
-    #endif
-    #ifdef ETIMEDOUT
-        case ETIMEDOUT: return DRWAV_TIMEOUT;
-    #endif
-    #ifdef ECONNREFUSED
-        case ECONNREFUSED: return DRWAV_CONNECTION_REFUSED;
-    #endif
-    #ifdef EHOSTDOWN
-        case EHOSTDOWN: return DRWAV_NO_HOST;
-    #endif
-    #ifdef EHOSTUNREACH
-        case EHOSTUNREACH: return DRWAV_NO_HOST;
-    #endif
-    #ifdef EALREADY
-        case EALREADY: return DRWAV_IN_PROGRESS;
-    #endif
-    #ifdef EINPROGRESS
-        case EINPROGRESS: return DRWAV_IN_PROGRESS;
-    #endif
-    #ifdef ESTALE
-        case ESTALE: return DRWAV_INVALID_FILE;
-    #endif
-    #ifdef EUCLEAN
-        case EUCLEAN: return DRWAV_ERROR;
-    #endif
-    #ifdef ENOTNAM
-        case ENOTNAM: return DRWAV_ERROR;
-    #endif
-    #ifdef ENAVAIL
-        case ENAVAIL: return DRWAV_ERROR;
-    #endif
-    #ifdef EISNAM
-        case EISNAM: return DRWAV_ERROR;
-    #endif
-    #ifdef EREMOTEIO
-        case EREMOTEIO: return DRWAV_IO_ERROR;
-    #endif
-    #ifdef EDQUOT
-        case EDQUOT: return DRWAV_NO_SPACE;
-    #endif
-    #ifdef ENOMEDIUM
-        case ENOMEDIUM: return DRWAV_DOES_NOT_EXIST;
-    #endif
-    #ifdef EMEDIUMTYPE
-        case EMEDIUMTYPE: return DRWAV_ERROR;
-    #endif
-    #ifdef ECANCELED
-        case ECANCELED: return DRWAV_CANCELLED;
-    #endif
-    #ifdef ENOKEY
-        case ENOKEY: return DRWAV_ERROR;
-    #endif
-    #ifdef EKEYEXPIRED
-        case EKEYEXPIRED: return DRWAV_ERROR;
-    #endif
-    #ifdef EKEYREVOKED
-        case EKEYREVOKED: return DRWAV_ERROR;
-    #endif
-    #ifdef EKEYREJECTED
-        case EKEYREJECTED: return DRWAV_ERROR;
-    #endif
-    #ifdef EOWNERDEAD
-        case EOWNERDEAD: return DRWAV_ERROR;
-    #endif
-    #ifdef ENOTRECOVERABLE
-        case ENOTRECOVERABLE: return DRWAV_ERROR;
-    #endif
-    #ifdef ERFKILL
-        case ERFKILL: return DRWAV_ERROR;
-    #endif
-    #ifdef EHWPOISON
-        case EHWPOISON: return DRWAV_ERROR;
-    #endif
-        default: return DRWAV_ERROR;
-    }
-}
-
-static drwav_result drwav_fopen(FILE** ppFile, const char* pFilePath, const char* pOpenMode)
-{
-#if _MSC_VER && _MSC_VER >= 1400
-    errno_t err;
-#endif
-
-    if (ppFile != NULL) {
-        *ppFile = NULL;  /* Safety. */
-    }
-
-    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
-        return DRWAV_INVALID_ARGS;
-    }
-
-#if _MSC_VER && _MSC_VER >= 1400
-    err = fopen_s(ppFile, pFilePath, pOpenMode);
-    if (err != 0) {
-        return drwav_result_from_errno(err);
-    }
-#else
-#if defined(_WIN32) || defined(__APPLE__)
-    *ppFile = fopen(pFilePath, pOpenMode);
-#else
-    #if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS == 64 && defined(_LARGEFILE64_SOURCE)
-        *ppFile = fopen64(pFilePath, pOpenMode);
-    #else
-        *ppFile = fopen(pFilePath, pOpenMode);
-    #endif
-#endif
-    if (*ppFile == NULL) {
-        drwav_result result = drwav_result_from_errno(errno);
-        if (result == DRWAV_SUCCESS) {
-            result = DRWAV_ERROR;   /* Just a safety check to make sure we never ever return success when pFile == NULL. */
-        }
-
-        return result;
-    }
-#endif
-
-    return DRWAV_SUCCESS;
-}
-
-/*
-_wfopen() isn't always available in all compilation environments.
-
-    * Windows only.
-    * MSVC seems to support it universally as far back as VC6 from what I can tell (haven't checked further back).
-    * MinGW-64 (both 32- and 64-bit) seems to support it.
-    * MinGW wraps it in !defined(__STRICT_ANSI__).
-
-This can be reviewed as compatibility issues arise. The preference is to use _wfopen_s() and _wfopen() as opposed to the wcsrtombs()
-fallback, so if you notice your compiler not detecting this properly I'm happy to look at adding support.
-*/
-#if defined(_WIN32)
-    #if defined(_MSC_VER) || defined(__MINGW64__) || !defined(__STRICT_ANSI__)
-        #define DRWAV_HAS_WFOPEN
-    #endif
-#endif
-
-static drwav_result drwav_wfopen(FILE** ppFile, const wchar_t* pFilePath, const wchar_t* pOpenMode, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (ppFile != NULL) {
-        *ppFile = NULL;  /* Safety. */
-    }
-
-    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
-        return DRWAV_INVALID_ARGS;
-    }
-
-#if defined(DRWAV_HAS_WFOPEN)
-    {
-        /* Use _wfopen() on Windows. */
-    #if defined(_MSC_VER) && _MSC_VER >= 1400
-        errno_t err = _wfopen_s(ppFile, pFilePath, pOpenMode);
-        if (err != 0) {
-            return drwav_result_from_errno(err);
-        }
-    #else
-        *ppFile = _wfopen(pFilePath, pOpenMode);
-        if (*ppFile == NULL) {
-            return drwav_result_from_errno(errno);
-        }
-    #endif
-        (void)pAllocationCallbacks;
-    }
-#else
-    /*
-    Use fopen() on anything other than Windows. Requires a conversion. This is annoying because fopen() is locale specific. The only real way I can
-    think of to do this is with wcsrtombs(). Note that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for
-    maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler error I'll look into improving compatibility.
-    */
-    {
-        mbstate_t mbs;
-        size_t lenMB;
-        const wchar_t* pFilePathTemp = pFilePath;
-        char* pFilePathMB = NULL;
-        char pOpenModeMB[32] = {0};
-
-        /* Get the length first. */
-        DRWAV_ZERO_OBJECT(&mbs);
-        lenMB = wcsrtombs(NULL, &pFilePathTemp, 0, &mbs);
-        if (lenMB == (size_t)-1) {
-            return drwav_result_from_errno(errno);
-        }
-
-        pFilePathMB = (char*)drwav__malloc_from_callbacks(lenMB + 1, pAllocationCallbacks);
-        if (pFilePathMB == NULL) {
-            return DRWAV_OUT_OF_MEMORY;
-        }
-
-        pFilePathTemp = pFilePath;
-        DRWAV_ZERO_OBJECT(&mbs);
-        wcsrtombs(pFilePathMB, &pFilePathTemp, lenMB + 1, &mbs);
-
-        /* The open mode should always consist of ASCII characters so we should be able to do a trivial conversion. */
-        {
-            size_t i = 0;
-            for (;;) {
-                if (pOpenMode[i] == 0) {
-                    pOpenModeMB[i] = '\0';
-                    break;
-                }
-
-                pOpenModeMB[i] = (char)pOpenMode[i];
-                i += 1;
-            }
-        }
-
-        *ppFile = fopen(pFilePathMB, pOpenModeMB);
-
-        drwav__free_from_callbacks(pFilePathMB, pAllocationCallbacks);
-    }
-
-    if (*ppFile == NULL) {
-        return DRWAV_ERROR;
-    }
-#endif
-
-    return DRWAV_SUCCESS;
-}
-
-
-static size_t drwav__on_read_stdio(void* pUserData, void* pBufferOut, size_t bytesToRead)
-{
-    return fread(pBufferOut, 1, bytesToRead, (FILE*)pUserData);
-}
-
-static size_t drwav__on_write_stdio(void* pUserData, const void* pData, size_t bytesToWrite)
-{
-    return fwrite(pData, 1, bytesToWrite, (FILE*)pUserData);
-}
-
-static drwav_bool32 drwav__on_seek_stdio(void* pUserData, int offset, drwav_seek_origin origin)
-{
-    return fseek((FILE*)pUserData, offset, (origin == drwav_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0;
-}
-
-DRWAV_API drwav_bool32 drwav_init_file(drwav* pWav, const char* filename, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    return drwav_init_file_ex(pWav, filename, NULL, NULL, 0, pAllocationCallbacks);
-}
-
-
-static drwav_bool32 drwav_init_file__internal_FILE(drwav* pWav, FILE* pFile, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav_bool32 result;
-
-    result = drwav_preinit(pWav, drwav__on_read_stdio, drwav__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
-    if (result != DRWAV_TRUE) {
-        fclose(pFile);
-        return result;
-    }
-
-    result = drwav_init__internal(pWav, onChunk, pChunkUserData, flags);
-    if (result != DRWAV_TRUE) {
-        fclose(pFile);
-        return result;
-    }
-
-    return DRWAV_TRUE;
-}
-
-DRWAV_API drwav_bool32 drwav_init_file_ex(drwav* pWav, const char* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    FILE* pFile;
-    if (drwav_fopen(&pFile, filename, "rb") != DRWAV_SUCCESS) {
-        return DRWAV_FALSE;
-    }
-
-    /* This takes ownership of the FILE* object. */
-    return drwav_init_file__internal_FILE(pWav, pFile, onChunk, pChunkUserData, flags, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_file_w(drwav* pWav, const wchar_t* filename, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    return drwav_init_file_ex_w(pWav, filename, NULL, NULL, 0, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_file_ex_w(drwav* pWav, const wchar_t* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    FILE* pFile;
-    if (drwav_wfopen(&pFile, filename, L"rb", pAllocationCallbacks) != DRWAV_SUCCESS) {
-        return DRWAV_FALSE;
-    }
-
-    /* This takes ownership of the FILE* object. */
-    return drwav_init_file__internal_FILE(pWav, pFile, onChunk, pChunkUserData, flags, pAllocationCallbacks);
-}
-
-
-static drwav_bool32 drwav_init_file_write__internal_FILE(drwav* pWav, FILE* pFile, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav_bool32 result;
-
-    result = drwav_preinit_write(pWav, pFormat, isSequential, drwav__on_write_stdio, drwav__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
-    if (result != DRWAV_TRUE) {
-        fclose(pFile);
-        return result;
-    }
-
-    result = drwav_init_write__internal(pWav, pFormat, totalSampleCount);
-    if (result != DRWAV_TRUE) {
-        fclose(pFile);
-        return result;
-    }
-
-    return DRWAV_TRUE;
-}
-
-static drwav_bool32 drwav_init_file_write__internal(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    FILE* pFile;
-    if (drwav_fopen(&pFile, filename, "wb") != DRWAV_SUCCESS) {
-        return DRWAV_FALSE;
-    }
-
-    /* This takes ownership of the FILE* object. */
-    return drwav_init_file_write__internal_FILE(pWav, pFile, pFormat, totalSampleCount, isSequential, pAllocationCallbacks);
-}
-
-static drwav_bool32 drwav_init_file_write_w__internal(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    FILE* pFile;
-    if (drwav_wfopen(&pFile, filename, L"wb", pAllocationCallbacks) != DRWAV_SUCCESS) {
-        return DRWAV_FALSE;
-    }
-
-    /* This takes ownership of the FILE* object. */
-    return drwav_init_file_write__internal_FILE(pWav, pFile, pFormat, totalSampleCount, isSequential, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_file_write(drwav* pWav, const char* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    return drwav_init_file_write__internal(pWav, filename, pFormat, 0, DRWAV_FALSE, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_file_write_sequential(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    return drwav_init_file_write__internal(pWav, filename, pFormat, totalSampleCount, DRWAV_TRUE, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pFormat == NULL) {
-        return DRWAV_FALSE;
-    }
-
-    return drwav_init_file_write_sequential(pWav, filename, pFormat, totalPCMFrameCount*pFormat->channels, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_file_write_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    return drwav_init_file_write_w__internal(pWav, filename, pFormat, 0, DRWAV_FALSE, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_file_write_sequential_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    return drwav_init_file_write_w__internal(pWav, filename, pFormat, totalSampleCount, DRWAV_TRUE, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pFormat == NULL) {
-        return DRWAV_FALSE;
-    }
-
-    return drwav_init_file_write_sequential_w(pWav, filename, pFormat, totalPCMFrameCount*pFormat->channels, pAllocationCallbacks);
-}
-#endif  /* DR_WAV_NO_STDIO */
-
-
-static size_t drwav__on_read_memory(void* pUserData, void* pBufferOut, size_t bytesToRead)
-{
-    drwav* pWav = (drwav*)pUserData;
-    size_t bytesRemaining;
-
-    DRWAV_ASSERT(pWav != NULL);
-    DRWAV_ASSERT(pWav->memoryStream.dataSize >= pWav->memoryStream.currentReadPos);
-
-    bytesRemaining = pWav->memoryStream.dataSize - pWav->memoryStream.currentReadPos;
-    if (bytesToRead > bytesRemaining) {
-        bytesToRead = bytesRemaining;
-    }
-
-    if (bytesToRead > 0) {
-        DRWAV_COPY_MEMORY(pBufferOut, pWav->memoryStream.data + pWav->memoryStream.currentReadPos, bytesToRead);
-        pWav->memoryStream.currentReadPos += bytesToRead;
-    }
-
-    return bytesToRead;
-}
-
-static drwav_bool32 drwav__on_seek_memory(void* pUserData, int offset, drwav_seek_origin origin)
-{
-    drwav* pWav = (drwav*)pUserData;
-    DRWAV_ASSERT(pWav != NULL);
-
-    if (origin == drwav_seek_origin_current) {
-        if (offset > 0) {
-            if (pWav->memoryStream.currentReadPos + offset > pWav->memoryStream.dataSize) {
-                return DRWAV_FALSE; /* Trying to seek too far forward. */
-            }
-        } else {
-            if (pWav->memoryStream.currentReadPos < (size_t)-offset) {
-                return DRWAV_FALSE; /* Trying to seek too far backwards. */
-            }
-        }
-
-        /* This will never underflow thanks to the clamps above. */
-        pWav->memoryStream.currentReadPos += offset;
-    } else {
-        if ((drwav_uint32)offset <= pWav->memoryStream.dataSize) {
-            pWav->memoryStream.currentReadPos = offset;
-        } else {
-            return DRWAV_FALSE; /* Trying to seek too far forward. */
-        }
-    }
-    
-    return DRWAV_TRUE;
-}
-
-static size_t drwav__on_write_memory(void* pUserData, const void* pDataIn, size_t bytesToWrite)
-{
-    drwav* pWav = (drwav*)pUserData;
-    size_t bytesRemaining;
-
-    DRWAV_ASSERT(pWav != NULL);
-    DRWAV_ASSERT(pWav->memoryStreamWrite.dataCapacity >= pWav->memoryStreamWrite.currentWritePos);
-
-    bytesRemaining = pWav->memoryStreamWrite.dataCapacity - pWav->memoryStreamWrite.currentWritePos;
-    if (bytesRemaining < bytesToWrite) {
-        /* Need to reallocate. */
-        void* pNewData;
-        size_t newDataCapacity = (pWav->memoryStreamWrite.dataCapacity == 0) ? 256 : pWav->memoryStreamWrite.dataCapacity * 2;
-
-        /* If doubling wasn't enough, just make it the minimum required size to write the data. */
-        if ((newDataCapacity - pWav->memoryStreamWrite.currentWritePos) < bytesToWrite) {
-            newDataCapacity = pWav->memoryStreamWrite.currentWritePos + bytesToWrite;
-        }
-
-        pNewData = drwav__realloc_from_callbacks(*pWav->memoryStreamWrite.ppData, newDataCapacity, pWav->memoryStreamWrite.dataCapacity, &pWav->allocationCallbacks);
-        if (pNewData == NULL) {
-            return 0;
-        }
-
-        *pWav->memoryStreamWrite.ppData = pNewData;
-        pWav->memoryStreamWrite.dataCapacity = newDataCapacity;
-    }
-
-    DRWAV_COPY_MEMORY(((drwav_uint8*)(*pWav->memoryStreamWrite.ppData)) + pWav->memoryStreamWrite.currentWritePos, pDataIn, bytesToWrite);
-
-    pWav->memoryStreamWrite.currentWritePos += bytesToWrite;
-    if (pWav->memoryStreamWrite.dataSize < pWav->memoryStreamWrite.currentWritePos) {
-        pWav->memoryStreamWrite.dataSize = pWav->memoryStreamWrite.currentWritePos;
-    }
-
-    *pWav->memoryStreamWrite.pDataSize = pWav->memoryStreamWrite.dataSize;
-
-    return bytesToWrite;
-}
-
-static drwav_bool32 drwav__on_seek_memory_write(void* pUserData, int offset, drwav_seek_origin origin)
-{
-    drwav* pWav = (drwav*)pUserData;
-    DRWAV_ASSERT(pWav != NULL);
-
-    if (origin == drwav_seek_origin_current) {
-        if (offset > 0) {
-            if (pWav->memoryStreamWrite.currentWritePos + offset > pWav->memoryStreamWrite.dataSize) {
-                offset = (int)(pWav->memoryStreamWrite.dataSize - pWav->memoryStreamWrite.currentWritePos);  /* Trying to seek too far forward. */
-            }
-        } else {
-            if (pWav->memoryStreamWrite.currentWritePos < (size_t)-offset) {
-                offset = -(int)pWav->memoryStreamWrite.currentWritePos;  /* Trying to seek too far backwards. */
-            }
-        }
-
-        /* This will never underflow thanks to the clamps above. */
-        pWav->memoryStreamWrite.currentWritePos += offset;
-    } else {
-        if ((drwav_uint32)offset <= pWav->memoryStreamWrite.dataSize) {
-            pWav->memoryStreamWrite.currentWritePos = offset;
-        } else {
-            pWav->memoryStreamWrite.currentWritePos = pWav->memoryStreamWrite.dataSize;  /* Trying to seek too far forward. */
-        }
-    }
-    
-    return DRWAV_TRUE;
-}
-
-DRWAV_API drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    return drwav_init_memory_ex(pWav, data, dataSize, NULL, NULL, 0, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_memory_ex(drwav* pWav, const void* data, size_t dataSize, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (data == NULL || dataSize == 0) {
-        return DRWAV_FALSE;
-    }
-
-    if (!drwav_preinit(pWav, drwav__on_read_memory, drwav__on_seek_memory, pWav, pAllocationCallbacks)) {
-        return DRWAV_FALSE;
-    }
-
-    pWav->memoryStream.data = (const drwav_uint8*)data;
-    pWav->memoryStream.dataSize = dataSize;
-    pWav->memoryStream.currentReadPos = 0;
-
-    return drwav_init__internal(pWav, onChunk, pChunkUserData, flags);
-}
-
-
-static drwav_bool32 drwav_init_memory_write__internal(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (ppData == NULL || pDataSize == NULL) {
-        return DRWAV_FALSE;
-    }
-
-    *ppData = NULL; /* Important because we're using realloc()! */
-    *pDataSize = 0;
-
-    if (!drwav_preinit_write(pWav, pFormat, isSequential, drwav__on_write_memory, drwav__on_seek_memory_write, pWav, pAllocationCallbacks)) {
-        return DRWAV_FALSE;
-    }
-
-    pWav->memoryStreamWrite.ppData = ppData;
-    pWav->memoryStreamWrite.pDataSize = pDataSize;
-    pWav->memoryStreamWrite.dataSize = 0;
-    pWav->memoryStreamWrite.dataCapacity = 0;
-    pWav->memoryStreamWrite.currentWritePos = 0;
-
-    return drwav_init_write__internal(pWav, pFormat, totalSampleCount);
-}
-
-DRWAV_API drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    return drwav_init_memory_write__internal(pWav, ppData, pDataSize, pFormat, 0, DRWAV_FALSE, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_memory_write_sequential(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    return drwav_init_memory_write__internal(pWav, ppData, pDataSize, pFormat, totalSampleCount, DRWAV_TRUE, pAllocationCallbacks);
-}
-
-DRWAV_API drwav_bool32 drwav_init_memory_write_sequential_pcm_frames(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pFormat == NULL) {
-        return DRWAV_FALSE;
-    }
-
-    return drwav_init_memory_write_sequential(pWav, ppData, pDataSize, pFormat, totalPCMFrameCount*pFormat->channels, pAllocationCallbacks);
-}
-
-
-
-DRWAV_API drwav_result drwav_uninit(drwav* pWav)
-{
-    drwav_result result = DRWAV_SUCCESS;
-
-    if (pWav == NULL) {
-        return DRWAV_INVALID_ARGS;
-    }
-
-    /*
-    If the drwav object was opened in write mode we'll need to finalize a few things:
-      - Make sure the "data" chunk is aligned to 16-bits for RIFF containers, or 64 bits for W64 containers.
-      - Set the size of the "data" chunk.
-    */
-    if (pWav->onWrite != NULL) {
-        drwav_uint32 paddingSize = 0;
-
-        /* Padding. Do not adjust pWav->dataChunkDataSize - this should not include the padding. */
-        if (pWav->container == drwav_container_riff) {
-            paddingSize = drwav__chunk_padding_size_riff(pWav->dataChunkDataSize);
-        } else {
-            paddingSize = drwav__chunk_padding_size_w64(pWav->dataChunkDataSize);
-        }
-        
-        if (paddingSize > 0) {
-            drwav_uint64 paddingData = 0;
-            pWav->onWrite(pWav->pUserData, &paddingData, paddingSize);
-        }
-
-        /*
-        Chunk sizes. When using sequential mode, these will have been filled in at initialization time. We only need
-        to do this when using non-sequential mode.
-        */
-        if (pWav->onSeek && !pWav->isSequentialWrite) {
-            if (pWav->container == drwav_container_riff) {
-                /* The "RIFF" chunk size. */
-                if (pWav->onSeek(pWav->pUserData, 4, drwav_seek_origin_start)) {
-                    drwav_uint32 riffChunkSize = drwav__riff_chunk_size_riff(pWav->dataChunkDataSize);
-                    pWav->onWrite(pWav->pUserData, &riffChunkSize, 4);
-                }
-
-                /* the "data" chunk size. */
-                if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos + 4, drwav_seek_origin_start)) {
-                    drwav_uint32 dataChunkSize = drwav__data_chunk_size_riff(pWav->dataChunkDataSize);
-                    pWav->onWrite(pWav->pUserData, &dataChunkSize, 4);
-                }
-            } else {
-                /* The "RIFF" chunk size. */
-                if (pWav->onSeek(pWav->pUserData, 16, drwav_seek_origin_start)) {
-                    drwav_uint64 riffChunkSize = drwav__riff_chunk_size_w64(pWav->dataChunkDataSize);
-                    pWav->onWrite(pWav->pUserData, &riffChunkSize, 8);
-                }
-
-                /* The "data" chunk size. */
-                if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos + 16, drwav_seek_origin_start)) {
-                    drwav_uint64 dataChunkSize = drwav__data_chunk_size_w64(pWav->dataChunkDataSize);
-                    pWav->onWrite(pWav->pUserData, &dataChunkSize, 8);
-                }
-            }
-        }
-
-        /* Validation for sequential mode. */
-        if (pWav->isSequentialWrite) {
-            if (pWav->dataChunkDataSize != pWav->dataChunkDataSizeTargetWrite) {
-                result = DRWAV_INVALID_FILE;
-            }
-        }
-    }
-
-#ifndef DR_WAV_NO_STDIO
-    /*
-    If we opened the file with drwav_open_file() we will want to close the file handle. We can know whether or not drwav_open_file()
-    was used by looking at the onRead and onSeek callbacks.
-    */
-    if (pWav->onRead == drwav__on_read_stdio || pWav->onWrite == drwav__on_write_stdio) {
-        fclose((FILE*)pWav->pUserData);
-    }
-#endif
-
-    return result;
-}
-
-
-
-DRWAV_API size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut)
-{
-    size_t bytesRead;
-
-    if (pWav == NULL || bytesToRead == 0 || pBufferOut == NULL) {
-        return 0;
-    }
-
-    if (bytesToRead > pWav->bytesRemaining) {
-        bytesToRead = (size_t)pWav->bytesRemaining;
-    }
-
-    bytesRead = pWav->onRead(pWav->pUserData, pBufferOut, bytesToRead);
-
-    pWav->bytesRemaining -= bytesRead;
-    return bytesRead;
-}
-
-
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_le(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut)
-{
-    drwav_uint32 bytesPerFrame;
-
-    if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) {
-        return 0;
-    }
-
-    /* Cannot use this function for compressed formats. */
-    if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
-        return 0;
-    }
-
-    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    /* Don't try to read more samples than can potentially fit in the output buffer. */
-    if (framesToRead * bytesPerFrame > DRWAV_SIZE_MAX) {
-        framesToRead = DRWAV_SIZE_MAX / bytesPerFrame;
-    }
-
-    return drwav_read_raw(pWav, (size_t)(framesToRead * bytesPerFrame), pBufferOut) / bytesPerFrame;
-}
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_be(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut)
-{
-    drwav_uint64 framesRead = drwav_read_pcm_frames_le(pWav, framesToRead, pBufferOut);
-    drwav__bswap_samples(pBufferOut, framesRead*pWav->channels, drwav_get_bytes_per_pcm_frame(pWav)/pWav->channels, pWav->translatedFormatTag);
-
-    return framesRead;
-}
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut)
-{
-    if (drwav__is_little_endian()) {
-        return drwav_read_pcm_frames_le(pWav, framesToRead, pBufferOut);
-    } else {
-        return drwav_read_pcm_frames_be(pWav, framesToRead, pBufferOut);
-    }
-}
-
-
-
-DRWAV_API drwav_bool32 drwav_seek_to_first_pcm_frame(drwav* pWav)
-{
-    if (pWav->onWrite != NULL) {
-        return DRWAV_FALSE; /* No seeking in write mode. */
-    }
-
-    if (!pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos, drwav_seek_origin_start)) {
-        return DRWAV_FALSE;
-    }
-
-    if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
-        pWav->compressed.iCurrentPCMFrame = 0;
-    }
-    
-    pWav->bytesRemaining = pWav->dataChunkDataSize;
-    return DRWAV_TRUE;
-}
-
-DRWAV_API drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64 targetFrameIndex)
-{
-    /* Seeking should be compatible with wave files > 2GB. */
-
-    if (pWav == NULL || pWav->onSeek == NULL) {
-        return DRWAV_FALSE;
-    }
-
-    /* No seeking in write mode. */
-    if (pWav->onWrite != NULL) {
-        return DRWAV_FALSE;
-    }
-
-    /* If there are no samples, just return DRWAV_TRUE without doing anything. */
-    if (pWav->totalPCMFrameCount == 0) {
-        return DRWAV_TRUE;
-    }
-
-    /* Make sure the sample is clamped. */
-    if (targetFrameIndex >= pWav->totalPCMFrameCount) {
-        targetFrameIndex  = pWav->totalPCMFrameCount - 1;
-    }
-
-    /*
-    For compressed formats we just use a slow generic seek. If we are seeking forward we just seek forward. If we are going backwards we need
-    to seek back to the start.
-    */
-    if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
-        /* TODO: This can be optimized. */
-        
-        /*
-        If we're seeking forward it's simple - just keep reading samples until we hit the sample we're requesting. If we're seeking backwards,
-        we first need to seek back to the start and then just do the same thing as a forward seek.
-        */
-        if (targetFrameIndex < pWav->compressed.iCurrentPCMFrame) {
-            if (!drwav_seek_to_first_pcm_frame(pWav)) {
-                return DRWAV_FALSE;
-            }
-        }
-
-        if (targetFrameIndex > pWav->compressed.iCurrentPCMFrame) {
-            drwav_uint64 offsetInFrames = targetFrameIndex - pWav->compressed.iCurrentPCMFrame;
-
-            drwav_int16 devnull[2048];
-            while (offsetInFrames > 0) {
-                drwav_uint64 framesRead = 0;
-                drwav_uint64 framesToRead = offsetInFrames;
-                if (framesToRead > drwav_countof(devnull)/pWav->channels) {
-                    framesToRead = drwav_countof(devnull)/pWav->channels;
-                }
-
-                if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
-                    framesRead = drwav_read_pcm_frames_s16__msadpcm(pWav, framesToRead, devnull);
-                } else if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
-                    framesRead = drwav_read_pcm_frames_s16__ima(pWav, framesToRead, devnull);
-                } else {
-                    DRWAV_ASSERT(DRWAV_FALSE);  /* If this assertion is triggered it means I've implemented a new compressed format but forgot to add a branch for it here. */
-                }
-
-                if (framesRead != framesToRead) {
-                    return DRWAV_FALSE;
-                }
-
-                offsetInFrames -= framesRead;
-            }
-        }
-    } else {
-        drwav_uint64 totalSizeInBytes;
-        drwav_uint64 currentBytePos;
-        drwav_uint64 targetBytePos;
-        drwav_uint64 offset;
-
-        totalSizeInBytes = pWav->totalPCMFrameCount * drwav_get_bytes_per_pcm_frame(pWav);
-        DRWAV_ASSERT(totalSizeInBytes >= pWav->bytesRemaining);
-
-        currentBytePos = totalSizeInBytes - pWav->bytesRemaining;
-        targetBytePos  = targetFrameIndex * drwav_get_bytes_per_pcm_frame(pWav);
-
-        if (currentBytePos < targetBytePos) {
-            /* Offset forwards. */
-            offset = (targetBytePos - currentBytePos);
-        } else {
-            /* Offset backwards. */
-            if (!drwav_seek_to_first_pcm_frame(pWav)) {
-                return DRWAV_FALSE;
-            }
-            offset = targetBytePos;
-        }
-
-        while (offset > 0) {
-            int offset32 = ((offset > INT_MAX) ? INT_MAX : (int)offset);
-            if (!pWav->onSeek(pWav->pUserData, offset32, drwav_seek_origin_current)) {
-                return DRWAV_FALSE;
-            }
-
-            pWav->bytesRemaining -= offset32;
-            offset -= offset32;
-        }
-    }
-
-    return DRWAV_TRUE;
-}
-
-
-DRWAV_API size_t drwav_write_raw(drwav* pWav, size_t bytesToWrite, const void* pData)
-{
-    size_t bytesWritten;
-
-    if (pWav == NULL || bytesToWrite == 0 || pData == NULL) {
-        return 0;
-    }
-
-    bytesWritten = pWav->onWrite(pWav->pUserData, pData, bytesToWrite);
-    pWav->dataChunkDataSize += bytesWritten;
-
-    return bytesWritten;
-}
-
-
-DRWAV_API drwav_uint64 drwav_write_pcm_frames_le(drwav* pWav, drwav_uint64 framesToWrite, const void* pData)
-{
-    drwav_uint64 bytesToWrite;
-    drwav_uint64 bytesWritten;
-    const drwav_uint8* pRunningData;
-
-    if (pWav == NULL || framesToWrite == 0 || pData == NULL) {
-        return 0;
-    }
-
-    bytesToWrite = ((framesToWrite * pWav->channels * pWav->bitsPerSample) / 8);
-    if (bytesToWrite > DRWAV_SIZE_MAX) {
-        return 0;
-    }
-
-    bytesWritten = 0;
-    pRunningData = (const drwav_uint8*)pData;
-
-    while (bytesToWrite > 0) {
-        size_t bytesJustWritten;
-        drwav_uint64 bytesToWriteThisIteration;
-
-        bytesToWriteThisIteration = bytesToWrite;
-        DRWAV_ASSERT(bytesToWriteThisIteration <= DRWAV_SIZE_MAX);  /* <-- This is checked above. */
-
-        bytesJustWritten = drwav_write_raw(pWav, (size_t)bytesToWriteThisIteration, pRunningData);
-        if (bytesJustWritten == 0) {
-            break;
-        }
-
-        bytesToWrite -= bytesJustWritten;
-        bytesWritten += bytesJustWritten;
-        pRunningData += bytesJustWritten;
-    }
-
-    return (bytesWritten * 8) / pWav->bitsPerSample / pWav->channels;
-}
-
-DRWAV_API drwav_uint64 drwav_write_pcm_frames_be(drwav* pWav, drwav_uint64 framesToWrite, const void* pData)
-{
-    drwav_uint64 bytesToWrite;
-    drwav_uint64 bytesWritten;
-    drwav_uint32 bytesPerSample;
-    const drwav_uint8* pRunningData;
-
-    if (pWav == NULL || framesToWrite == 0 || pData == NULL) {
-        return 0;
-    }
-
-    bytesToWrite = ((framesToWrite * pWav->channels * pWav->bitsPerSample) / 8);
-    if (bytesToWrite > DRWAV_SIZE_MAX) {
-        return 0;
-    }
-
-    bytesWritten = 0;
-    pRunningData = (const drwav_uint8*)pData;
-
-    bytesPerSample = drwav_get_bytes_per_pcm_frame(pWav) / pWav->channels;
-    
-    while (bytesToWrite > 0) {
-        drwav_uint8 temp[4096];
-        drwav_uint32 sampleCount;
-        size_t bytesJustWritten;
-        drwav_uint64 bytesToWriteThisIteration;
-
-        bytesToWriteThisIteration = bytesToWrite;
-        DRWAV_ASSERT(bytesToWriteThisIteration <= DRWAV_SIZE_MAX);  /* <-- This is checked above. */
-
-        /*
-        WAV files are always little-endian. We need to byte swap on big-endian architectures. Since our input buffer is read-only we need
-        to use an intermediary buffer for the conversion.
-        */
-        sampleCount = sizeof(temp)/bytesPerSample;
-
-        if (bytesToWriteThisIteration > ((drwav_uint64)sampleCount)*bytesPerSample) {
-            bytesToWriteThisIteration = ((drwav_uint64)sampleCount)*bytesPerSample;
-        }
-
-        DRWAV_COPY_MEMORY(temp, pRunningData, (size_t)bytesToWriteThisIteration);
-        drwav__bswap_samples(temp, sampleCount, bytesPerSample, pWav->translatedFormatTag);
-
-        bytesJustWritten = drwav_write_raw(pWav, (size_t)bytesToWriteThisIteration, temp);
-        if (bytesJustWritten == 0) {
-            break;
-        }
-
-        bytesToWrite -= bytesJustWritten;
-        bytesWritten += bytesJustWritten;
-        pRunningData += bytesJustWritten;
-    }
-
-    return (bytesWritten * 8) / pWav->bitsPerSample / pWav->channels;
-}
-
-DRWAV_API drwav_uint64 drwav_write_pcm_frames(drwav* pWav, drwav_uint64 framesToWrite, const void* pData)
-{
-    if (drwav__is_little_endian()) {
-        return drwav_write_pcm_frames_le(pWav, framesToWrite, pData);
-    } else {
-        return drwav_write_pcm_frames_be(pWav, framesToWrite, pData);
-    }
-}
-
-
-static drwav_uint64 drwav_read_pcm_frames_s16__msadpcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
-{
-    drwav_uint64 totalFramesRead = 0;
-
-    DRWAV_ASSERT(pWav != NULL);
-    DRWAV_ASSERT(framesToRead > 0);
-    DRWAV_ASSERT(pBufferOut != NULL);
-
-    /* TODO: Lots of room for optimization here. */
-
-    while (framesToRead > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) {
-        /* If there are no cached frames we need to load a new block. */
-        if (pWav->msadpcm.cachedFrameCount == 0 && pWav->msadpcm.bytesRemainingInBlock == 0) {
-            if (pWav->channels == 1) {
-                /* Mono. */
-                drwav_uint8 header[7];
-                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
-                    return totalFramesRead;
-                }
-                pWav->msadpcm.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
-
-                pWav->msadpcm.predictor[0]     = header[0];
-                pWav->msadpcm.delta[0]         = drwav__bytes_to_s16(header + 1);
-                pWav->msadpcm.prevFrames[0][1] = (drwav_int32)drwav__bytes_to_s16(header + 3);
-                pWav->msadpcm.prevFrames[0][0] = (drwav_int32)drwav__bytes_to_s16(header + 5);
-                pWav->msadpcm.cachedFrames[2]  = pWav->msadpcm.prevFrames[0][0];
-                pWav->msadpcm.cachedFrames[3]  = pWav->msadpcm.prevFrames[0][1];
-                pWav->msadpcm.cachedFrameCount = 2;
-            } else {
-                /* Stereo. */
-                drwav_uint8 header[14];
-                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
-                    return totalFramesRead;
-                }
-                pWav->msadpcm.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
-
-                pWav->msadpcm.predictor[0] = header[0];
-                pWav->msadpcm.predictor[1] = header[1];
-                pWav->msadpcm.delta[0] = drwav__bytes_to_s16(header + 2);
-                pWav->msadpcm.delta[1] = drwav__bytes_to_s16(header + 4);
-                pWav->msadpcm.prevFrames[0][1] = (drwav_int32)drwav__bytes_to_s16(header + 6);
-                pWav->msadpcm.prevFrames[1][1] = (drwav_int32)drwav__bytes_to_s16(header + 8);
-                pWav->msadpcm.prevFrames[0][0] = (drwav_int32)drwav__bytes_to_s16(header + 10);
-                pWav->msadpcm.prevFrames[1][0] = (drwav_int32)drwav__bytes_to_s16(header + 12);
-
-                pWav->msadpcm.cachedFrames[0] = pWav->msadpcm.prevFrames[0][0];
-                pWav->msadpcm.cachedFrames[1] = pWav->msadpcm.prevFrames[1][0];
-                pWav->msadpcm.cachedFrames[2] = pWav->msadpcm.prevFrames[0][1];
-                pWav->msadpcm.cachedFrames[3] = pWav->msadpcm.prevFrames[1][1];
-                pWav->msadpcm.cachedFrameCount = 2;
-            }
-        }
-
-        /* Output anything that's cached. */
-        while (framesToRead > 0 && pWav->msadpcm.cachedFrameCount > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) {
-            drwav_uint32 iSample = 0;
-            for (iSample = 0; iSample < pWav->channels; iSample += 1) {
-                pBufferOut[iSample] = (drwav_int16)pWav->msadpcm.cachedFrames[(drwav_countof(pWav->msadpcm.cachedFrames) - (pWav->msadpcm.cachedFrameCount*pWav->channels)) + iSample];
-            }
-
-            pBufferOut      += pWav->channels;
-            framesToRead    -= 1;
-            totalFramesRead += 1;
-            pWav->compressed.iCurrentPCMFrame += 1;
-            pWav->msadpcm.cachedFrameCount -= 1;
-        }
-
-        if (framesToRead == 0) {
-            return totalFramesRead;
-        }
-
-
-        /*
-        If there's nothing left in the cache, just go ahead and load more. If there's nothing left to load in the current block we just continue to the next
-        loop iteration which will trigger the loading of a new block.
-        */
-        if (pWav->msadpcm.cachedFrameCount == 0) {
-            if (pWav->msadpcm.bytesRemainingInBlock == 0) {
-                continue;
-            } else {
-                static drwav_int32 adaptationTable[] = { 
-                    230, 230, 230, 230, 307, 409, 512, 614, 
-                    768, 614, 512, 409, 307, 230, 230, 230 
-                };
-                static drwav_int32 coeff1Table[] = { 256, 512, 0, 192, 240, 460,  392 };
-                static drwav_int32 coeff2Table[] = { 0,  -256, 0, 64,  0,  -208, -232 };
-
-                drwav_uint8 nibbles;
-                drwav_int32 nibble0;
-                drwav_int32 nibble1;
-
-                if (pWav->onRead(pWav->pUserData, &nibbles, 1) != 1) {
-                    return totalFramesRead;
-                }
-                pWav->msadpcm.bytesRemainingInBlock -= 1;
-
-                /* TODO: Optimize away these if statements. */
-                nibble0 = ((nibbles & 0xF0) >> 4); if ((nibbles & 0x80)) { nibble0 |= 0xFFFFFFF0UL; }
-                nibble1 = ((nibbles & 0x0F) >> 0); if ((nibbles & 0x08)) { nibble1 |= 0xFFFFFFF0UL; }
-
-                if (pWav->channels == 1) {
-                    /* Mono. */
-                    drwav_int32 newSample0;
-                    drwav_int32 newSample1;
-
-                    newSample0  = ((pWav->msadpcm.prevFrames[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevFrames[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8;
-                    newSample0 += nibble0 * pWav->msadpcm.delta[0];
-                    newSample0  = drwav_clamp(newSample0, -32768, 32767);
-
-                    pWav->msadpcm.delta[0] = (adaptationTable[((nibbles & 0xF0) >> 4)] * pWav->msadpcm.delta[0]) >> 8;
-                    if (pWav->msadpcm.delta[0] < 16) {
-                        pWav->msadpcm.delta[0] = 16;
-                    }
-
-                    pWav->msadpcm.prevFrames[0][0] = pWav->msadpcm.prevFrames[0][1];
-                    pWav->msadpcm.prevFrames[0][1] = newSample0;
-
-
-                    newSample1  = ((pWav->msadpcm.prevFrames[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevFrames[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8;
-                    newSample1 += nibble1 * pWav->msadpcm.delta[0];
-                    newSample1  = drwav_clamp(newSample1, -32768, 32767);
-
-                    pWav->msadpcm.delta[0] = (adaptationTable[((nibbles & 0x0F) >> 0)] * pWav->msadpcm.delta[0]) >> 8;
-                    if (pWav->msadpcm.delta[0] < 16) {
-                        pWav->msadpcm.delta[0] = 16;
-                    }
-
-                    pWav->msadpcm.prevFrames[0][0] = pWav->msadpcm.prevFrames[0][1];
-                    pWav->msadpcm.prevFrames[0][1] = newSample1;
-
-
-                    pWav->msadpcm.cachedFrames[2] = newSample0;
-                    pWav->msadpcm.cachedFrames[3] = newSample1;
-                    pWav->msadpcm.cachedFrameCount = 2;
-                } else {
-                    /* Stereo. */
-                    drwav_int32 newSample0;
-                    drwav_int32 newSample1;
-
-                    /* Left. */
-                    newSample0  = ((pWav->msadpcm.prevFrames[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevFrames[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8;
-                    newSample0 += nibble0 * pWav->msadpcm.delta[0];
-                    newSample0  = drwav_clamp(newSample0, -32768, 32767);
-
-                    pWav->msadpcm.delta[0] = (adaptationTable[((nibbles & 0xF0) >> 4)] * pWav->msadpcm.delta[0]) >> 8;
-                    if (pWav->msadpcm.delta[0] < 16) {
-                        pWav->msadpcm.delta[0] = 16;
-                    }
-
-                    pWav->msadpcm.prevFrames[0][0] = pWav->msadpcm.prevFrames[0][1];
-                    pWav->msadpcm.prevFrames[0][1] = newSample0;
-
-
-                    /* Right. */
-                    newSample1  = ((pWav->msadpcm.prevFrames[1][1] * coeff1Table[pWav->msadpcm.predictor[1]]) + (pWav->msadpcm.prevFrames[1][0] * coeff2Table[pWav->msadpcm.predictor[1]])) >> 8;
-                    newSample1 += nibble1 * pWav->msadpcm.delta[1];
-                    newSample1  = drwav_clamp(newSample1, -32768, 32767);
-
-                    pWav->msadpcm.delta[1] = (adaptationTable[((nibbles & 0x0F) >> 0)] * pWav->msadpcm.delta[1]) >> 8;
-                    if (pWav->msadpcm.delta[1] < 16) {
-                        pWav->msadpcm.delta[1] = 16;
-                    }
-
-                    pWav->msadpcm.prevFrames[1][0] = pWav->msadpcm.prevFrames[1][1];
-                    pWav->msadpcm.prevFrames[1][1] = newSample1;
-
-                    pWav->msadpcm.cachedFrames[2] = newSample0;
-                    pWav->msadpcm.cachedFrames[3] = newSample1;
-                    pWav->msadpcm.cachedFrameCount = 1;
-                }
-            }
-        }
-    }
-
-    return totalFramesRead;
-}
-
-
-static drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
-{
-    drwav_uint64 totalFramesRead = 0;
-
-    DRWAV_ASSERT(pWav != NULL);
-    DRWAV_ASSERT(framesToRead > 0);
-    DRWAV_ASSERT(pBufferOut != NULL);
-
-    /* TODO: Lots of room for optimization here. */
-
-    while (framesToRead > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) {
-        /* If there are no cached samples we need to load a new block. */
-        if (pWav->ima.cachedFrameCount == 0 && pWav->ima.bytesRemainingInBlock == 0) {
-            if (pWav->channels == 1) {
-                /* Mono. */
-                drwav_uint8 header[4];
-                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
-                    return totalFramesRead;
-                }
-                pWav->ima.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
-
-                pWav->ima.predictor[0] = drwav__bytes_to_s16(header + 0);
-                pWav->ima.stepIndex[0] = header[2];
-                pWav->ima.cachedFrames[drwav_countof(pWav->ima.cachedFrames) - 1] = pWav->ima.predictor[0];
-                pWav->ima.cachedFrameCount = 1;
-            } else {
-                /* Stereo. */
-                drwav_uint8 header[8];
-                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
-                    return totalFramesRead;
-                }
-                pWav->ima.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
-
-                pWav->ima.predictor[0] = drwav__bytes_to_s16(header + 0);
-                pWav->ima.stepIndex[0] = header[2];
-                pWav->ima.predictor[1] = drwav__bytes_to_s16(header + 4);
-                pWav->ima.stepIndex[1] = header[6];
-
-                pWav->ima.cachedFrames[drwav_countof(pWav->ima.cachedFrames) - 2] = pWav->ima.predictor[0];
-                pWav->ima.cachedFrames[drwav_countof(pWav->ima.cachedFrames) - 1] = pWav->ima.predictor[1];
-                pWav->ima.cachedFrameCount = 1;
-            }
-        }
-
-        /* Output anything that's cached. */
-        while (framesToRead > 0 && pWav->ima.cachedFrameCount > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) {
-            drwav_uint32 iSample;
-            for (iSample = 0; iSample < pWav->channels; iSample += 1) {
-                pBufferOut[iSample] = (drwav_int16)pWav->ima.cachedFrames[(drwav_countof(pWav->ima.cachedFrames) - (pWav->ima.cachedFrameCount*pWav->channels)) + iSample];
-            }
-
-            pBufferOut      += pWav->channels;
-            framesToRead    -= 1;
-            totalFramesRead += 1;
-            pWav->compressed.iCurrentPCMFrame += 1;
-            pWav->ima.cachedFrameCount -= 1;
-        }
-
-        if (framesToRead == 0) {
-            return totalFramesRead;
-        }
-
-        /*
-        If there's nothing left in the cache, just go ahead and load more. If there's nothing left to load in the current block we just continue to the next
-        loop iteration which will trigger the loading of a new block.
-        */
-        if (pWav->ima.cachedFrameCount == 0) {
-            if (pWav->ima.bytesRemainingInBlock == 0) {
-                continue;
-            } else {
-                static drwav_int32 indexTable[16] = {
-                    -1, -1, -1, -1, 2, 4, 6, 8,
-                    -1, -1, -1, -1, 2, 4, 6, 8
-                };
-
-                static drwav_int32 stepTable[89] = {
-                    7,     8,     9,     10,    11,    12,    13,    14,    16,    17, 
-                    19,    21,    23,    25,    28,    31,    34,    37,    41,    45, 
-                    50,    55,    60,    66,    73,    80,    88,    97,    107,   118, 
-                    130,   143,   157,   173,   190,   209,   230,   253,   279,   307,
-                    337,   371,   408,   449,   494,   544,   598,   658,   724,   796,
-                    876,   963,   1060,  1166,  1282,  1411,  1552,  1707,  1878,  2066, 
-                    2272,  2499,  2749,  3024,  3327,  3660,  4026,  4428,  4871,  5358,
-                    5894,  6484,  7132,  7845,  8630,  9493,  10442, 11487, 12635, 13899, 
-                    15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767 
-                };
-
-                drwav_uint32 iChannel;
-
-                /*
-                From what I can tell with stereo streams, it looks like every 4 bytes (8 samples) is for one channel. So it goes 4 bytes for the
-                left channel, 4 bytes for the right channel.
-                */
-                pWav->ima.cachedFrameCount = 8;
-                for (iChannel = 0; iChannel < pWav->channels; ++iChannel) {
-                    drwav_uint32 iByte;
-                    drwav_uint8 nibbles[4];
-                    if (pWav->onRead(pWav->pUserData, &nibbles, 4) != 4) {
-                        pWav->ima.cachedFrameCount = 0;
-                        return totalFramesRead;
-                    }
-                    pWav->ima.bytesRemainingInBlock -= 4;
-
-                    for (iByte = 0; iByte < 4; ++iByte) {
-                        drwav_uint8 nibble0 = ((nibbles[iByte] & 0x0F) >> 0);
-                        drwav_uint8 nibble1 = ((nibbles[iByte] & 0xF0) >> 4);
-
-                        drwav_int32 step      = stepTable[pWav->ima.stepIndex[iChannel]];
-                        drwav_int32 predictor = pWav->ima.predictor[iChannel];
-
-                        drwav_int32      diff  = step >> 3;
-                        if (nibble0 & 1) diff += step >> 2;
-                        if (nibble0 & 2) diff += step >> 1;
-                        if (nibble0 & 4) diff += step;
-                        if (nibble0 & 8) diff  = -diff;
-
-                        predictor = drwav_clamp(predictor + diff, -32768, 32767);
-                        pWav->ima.predictor[iChannel] = predictor;
-                        pWav->ima.stepIndex[iChannel] = drwav_clamp(pWav->ima.stepIndex[iChannel] + indexTable[nibble0], 0, (drwav_int32)drwav_countof(stepTable)-1);
-                        pWav->ima.cachedFrames[(drwav_countof(pWav->ima.cachedFrames) - (pWav->ima.cachedFrameCount*pWav->channels)) + (iByte*2+0)*pWav->channels + iChannel] = predictor;
-
-
-                        step      = stepTable[pWav->ima.stepIndex[iChannel]];
-                        predictor = pWav->ima.predictor[iChannel];
-
-                                         diff  = step >> 3;
-                        if (nibble1 & 1) diff += step >> 2;
-                        if (nibble1 & 2) diff += step >> 1;
-                        if (nibble1 & 4) diff += step;
-                        if (nibble1 & 8) diff  = -diff;
-
-                        predictor = drwav_clamp(predictor + diff, -32768, 32767);
-                        pWav->ima.predictor[iChannel] = predictor;
-                        pWav->ima.stepIndex[iChannel] = drwav_clamp(pWav->ima.stepIndex[iChannel] + indexTable[nibble1], 0, (drwav_int32)drwav_countof(stepTable)-1);
-                        pWav->ima.cachedFrames[(drwav_countof(pWav->ima.cachedFrames) - (pWav->ima.cachedFrameCount*pWav->channels)) + (iByte*2+1)*pWav->channels + iChannel] = predictor;
-                    }
-                }
-            }
-        }
-    }
-
-    return totalFramesRead;
-}
-
-
-#ifndef DR_WAV_NO_CONVERSION_API
-static unsigned short g_drwavAlawTable[256] = {
-    0xEA80, 0xEB80, 0xE880, 0xE980, 0xEE80, 0xEF80, 0xEC80, 0xED80, 0xE280, 0xE380, 0xE080, 0xE180, 0xE680, 0xE780, 0xE480, 0xE580, 
-    0xF540, 0xF5C0, 0xF440, 0xF4C0, 0xF740, 0xF7C0, 0xF640, 0xF6C0, 0xF140, 0xF1C0, 0xF040, 0xF0C0, 0xF340, 0xF3C0, 0xF240, 0xF2C0, 
-    0xAA00, 0xAE00, 0xA200, 0xA600, 0xBA00, 0xBE00, 0xB200, 0xB600, 0x8A00, 0x8E00, 0x8200, 0x8600, 0x9A00, 0x9E00, 0x9200, 0x9600, 
-    0xD500, 0xD700, 0xD100, 0xD300, 0xDD00, 0xDF00, 0xD900, 0xDB00, 0xC500, 0xC700, 0xC100, 0xC300, 0xCD00, 0xCF00, 0xC900, 0xCB00, 
-    0xFEA8, 0xFEB8, 0xFE88, 0xFE98, 0xFEE8, 0xFEF8, 0xFEC8, 0xFED8, 0xFE28, 0xFE38, 0xFE08, 0xFE18, 0xFE68, 0xFE78, 0xFE48, 0xFE58, 
-    0xFFA8, 0xFFB8, 0xFF88, 0xFF98, 0xFFE8, 0xFFF8, 0xFFC8, 0xFFD8, 0xFF28, 0xFF38, 0xFF08, 0xFF18, 0xFF68, 0xFF78, 0xFF48, 0xFF58, 
-    0xFAA0, 0xFAE0, 0xFA20, 0xFA60, 0xFBA0, 0xFBE0, 0xFB20, 0xFB60, 0xF8A0, 0xF8E0, 0xF820, 0xF860, 0xF9A0, 0xF9E0, 0xF920, 0xF960, 
-    0xFD50, 0xFD70, 0xFD10, 0xFD30, 0xFDD0, 0xFDF0, 0xFD90, 0xFDB0, 0xFC50, 0xFC70, 0xFC10, 0xFC30, 0xFCD0, 0xFCF0, 0xFC90, 0xFCB0, 
-    0x1580, 0x1480, 0x1780, 0x1680, 0x1180, 0x1080, 0x1380, 0x1280, 0x1D80, 0x1C80, 0x1F80, 0x1E80, 0x1980, 0x1880, 0x1B80, 0x1A80, 
-    0x0AC0, 0x0A40, 0x0BC0, 0x0B40, 0x08C0, 0x0840, 0x09C0, 0x0940, 0x0EC0, 0x0E40, 0x0FC0, 0x0F40, 0x0CC0, 0x0C40, 0x0DC0, 0x0D40, 
-    0x5600, 0x5200, 0x5E00, 0x5A00, 0x4600, 0x4200, 0x4E00, 0x4A00, 0x7600, 0x7200, 0x7E00, 0x7A00, 0x6600, 0x6200, 0x6E00, 0x6A00, 
-    0x2B00, 0x2900, 0x2F00, 0x2D00, 0x2300, 0x2100, 0x2700, 0x2500, 0x3B00, 0x3900, 0x3F00, 0x3D00, 0x3300, 0x3100, 0x3700, 0x3500, 
-    0x0158, 0x0148, 0x0178, 0x0168, 0x0118, 0x0108, 0x0138, 0x0128, 0x01D8, 0x01C8, 0x01F8, 0x01E8, 0x0198, 0x0188, 0x01B8, 0x01A8, 
-    0x0058, 0x0048, 0x0078, 0x0068, 0x0018, 0x0008, 0x0038, 0x0028, 0x00D8, 0x00C8, 0x00F8, 0x00E8, 0x0098, 0x0088, 0x00B8, 0x00A8, 
-    0x0560, 0x0520, 0x05E0, 0x05A0, 0x0460, 0x0420, 0x04E0, 0x04A0, 0x0760, 0x0720, 0x07E0, 0x07A0, 0x0660, 0x0620, 0x06E0, 0x06A0, 
-    0x02B0, 0x0290, 0x02F0, 0x02D0, 0x0230, 0x0210, 0x0270, 0x0250, 0x03B0, 0x0390, 0x03F0, 0x03D0, 0x0330, 0x0310, 0x0370, 0x0350
-};
-
-static unsigned short g_drwavMulawTable[256] = {
-    0x8284, 0x8684, 0x8A84, 0x8E84, 0x9284, 0x9684, 0x9A84, 0x9E84, 0xA284, 0xA684, 0xAA84, 0xAE84, 0xB284, 0xB684, 0xBA84, 0xBE84, 
-    0xC184, 0xC384, 0xC584, 0xC784, 0xC984, 0xCB84, 0xCD84, 0xCF84, 0xD184, 0xD384, 0xD584, 0xD784, 0xD984, 0xDB84, 0xDD84, 0xDF84, 
-    0xE104, 0xE204, 0xE304, 0xE404, 0xE504, 0xE604, 0xE704, 0xE804, 0xE904, 0xEA04, 0xEB04, 0xEC04, 0xED04, 0xEE04, 0xEF04, 0xF004, 
-    0xF0C4, 0xF144, 0xF1C4, 0xF244, 0xF2C4, 0xF344, 0xF3C4, 0xF444, 0xF4C4, 0xF544, 0xF5C4, 0xF644, 0xF6C4, 0xF744, 0xF7C4, 0xF844, 
-    0xF8A4, 0xF8E4, 0xF924, 0xF964, 0xF9A4, 0xF9E4, 0xFA24, 0xFA64, 0xFAA4, 0xFAE4, 0xFB24, 0xFB64, 0xFBA4, 0xFBE4, 0xFC24, 0xFC64, 
-    0xFC94, 0xFCB4, 0xFCD4, 0xFCF4, 0xFD14, 0xFD34, 0xFD54, 0xFD74, 0xFD94, 0xFDB4, 0xFDD4, 0xFDF4, 0xFE14, 0xFE34, 0xFE54, 0xFE74, 
-    0xFE8C, 0xFE9C, 0xFEAC, 0xFEBC, 0xFECC, 0xFEDC, 0xFEEC, 0xFEFC, 0xFF0C, 0xFF1C, 0xFF2C, 0xFF3C, 0xFF4C, 0xFF5C, 0xFF6C, 0xFF7C, 
-    0xFF88, 0xFF90, 0xFF98, 0xFFA0, 0xFFA8, 0xFFB0, 0xFFB8, 0xFFC0, 0xFFC8, 0xFFD0, 0xFFD8, 0xFFE0, 0xFFE8, 0xFFF0, 0xFFF8, 0x0000, 
-    0x7D7C, 0x797C, 0x757C, 0x717C, 0x6D7C, 0x697C, 0x657C, 0x617C, 0x5D7C, 0x597C, 0x557C, 0x517C, 0x4D7C, 0x497C, 0x457C, 0x417C, 
-    0x3E7C, 0x3C7C, 0x3A7C, 0x387C, 0x367C, 0x347C, 0x327C, 0x307C, 0x2E7C, 0x2C7C, 0x2A7C, 0x287C, 0x267C, 0x247C, 0x227C, 0x207C, 
-    0x1EFC, 0x1DFC, 0x1CFC, 0x1BFC, 0x1AFC, 0x19FC, 0x18FC, 0x17FC, 0x16FC, 0x15FC, 0x14FC, 0x13FC, 0x12FC, 0x11FC, 0x10FC, 0x0FFC, 
-    0x0F3C, 0x0EBC, 0x0E3C, 0x0DBC, 0x0D3C, 0x0CBC, 0x0C3C, 0x0BBC, 0x0B3C, 0x0ABC, 0x0A3C, 0x09BC, 0x093C, 0x08BC, 0x083C, 0x07BC, 
-    0x075C, 0x071C, 0x06DC, 0x069C, 0x065C, 0x061C, 0x05DC, 0x059C, 0x055C, 0x051C, 0x04DC, 0x049C, 0x045C, 0x041C, 0x03DC, 0x039C, 
-    0x036C, 0x034C, 0x032C, 0x030C, 0x02EC, 0x02CC, 0x02AC, 0x028C, 0x026C, 0x024C, 0x022C, 0x020C, 0x01EC, 0x01CC, 0x01AC, 0x018C, 
-    0x0174, 0x0164, 0x0154, 0x0144, 0x0134, 0x0124, 0x0114, 0x0104, 0x00F4, 0x00E4, 0x00D4, 0x00C4, 0x00B4, 0x00A4, 0x0094, 0x0084, 
-    0x0078, 0x0070, 0x0068, 0x0060, 0x0058, 0x0050, 0x0048, 0x0040, 0x0038, 0x0030, 0x0028, 0x0020, 0x0018, 0x0010, 0x0008, 0x0000
-};
-
-static DRWAV_INLINE drwav_int16 drwav__alaw_to_s16(drwav_uint8 sampleIn)
-{
-    return (short)g_drwavAlawTable[sampleIn];
-}
-
-static DRWAV_INLINE drwav_int16 drwav__mulaw_to_s16(drwav_uint8 sampleIn)
-{
-    return (short)g_drwavMulawTable[sampleIn];
-}
-
-
-
-static void drwav__pcm_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
-{
-    unsigned int i;
-
-    /* Special case for 8-bit sample data because it's treated as unsigned. */
-    if (bytesPerSample == 1) {
-        drwav_u8_to_s16(pOut, pIn, totalSampleCount);
-        return;
-    }
-
-
-    /* Slightly more optimal implementation for common formats. */
-    if (bytesPerSample == 2) {
-        for (i = 0; i < totalSampleCount; ++i) {
-           *pOut++ = ((const drwav_int16*)pIn)[i];
-        }
-        return;
-    }
-    if (bytesPerSample == 3) {
-        drwav_s24_to_s16(pOut, pIn, totalSampleCount);
-        return;
-    }
-    if (bytesPerSample == 4) {
-        drwav_s32_to_s16(pOut, (const drwav_int32*)pIn, totalSampleCount);
-        return;
-    }
-
-
-    /* Anything more than 64 bits per sample is not supported. */
-    if (bytesPerSample > 8) {
-        DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut));
-        return;
-    }
-
-
-    /* Generic, slow converter. */
-    for (i = 0; i < totalSampleCount; ++i) {
-        drwav_uint64 sample = 0;
-        unsigned int shift  = (8 - bytesPerSample) * 8;
-
-        unsigned int j;
-        for (j = 0; j < bytesPerSample; j += 1) {
-            DRWAV_ASSERT(j < 8);
-            sample |= (drwav_uint64)(pIn[j]) << shift;
-            shift  += 8;
-        }
-
-        pIn += j;
-        *pOut++ = (drwav_int16)((drwav_int64)sample >> 48);
-    }
-}
-
-static void drwav__ieee_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
-{
-    if (bytesPerSample == 4) {
-        drwav_f32_to_s16(pOut, (const float*)pIn, totalSampleCount);
-        return;
-    } else if (bytesPerSample == 8) {
-        drwav_f64_to_s16(pOut, (const double*)pIn, totalSampleCount);
-        return;
-    } else {
-        /* Only supporting 32- and 64-bit float. Output silence in all other cases. Contributions welcome for 16-bit float. */
-        DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut));
-        return;
-    }
-}
-
-static drwav_uint64 drwav_read_pcm_frames_s16__pcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
-{
-    drwav_uint32 bytesPerFrame;
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-
-    /* Fast path. */
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bitsPerSample == 16) {
-        return drwav_read_pcm_frames(pWav, framesToRead, pBufferOut);
-    }
-    
-    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-    
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav__pcm_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_s16__ieee(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
-{
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-    
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav__ieee_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_s16__alaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
-{
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-    
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav_alaw_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_s16__mulaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
-{
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav_mulaw_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
-{
-    if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) {
-        return 0;
-    }
-
-    /* Don't try to read more samples than can potentially fit in the output buffer. */
-    if (framesToRead * pWav->channels * sizeof(drwav_int16) > DRWAV_SIZE_MAX) {
-        framesToRead = DRWAV_SIZE_MAX / sizeof(drwav_int16) / pWav->channels;
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) {
-        return drwav_read_pcm_frames_s16__pcm(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) {
-        return drwav_read_pcm_frames_s16__ieee(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW) {
-        return drwav_read_pcm_frames_s16__alaw(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) {
-        return drwav_read_pcm_frames_s16__mulaw(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
-        return drwav_read_pcm_frames_s16__msadpcm(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
-        return drwav_read_pcm_frames_s16__ima(pWav, framesToRead, pBufferOut);
-    }
-
-    return 0;
-}
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16le(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
-{
-    drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, framesToRead, pBufferOut);
-    if (!drwav__is_little_endian()) {
-        drwav__bswap_samples_s16(pBufferOut, framesRead*pWav->channels);
-    }
-
-    return framesRead;
-}
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16be(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
-{
-    drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, framesToRead, pBufferOut);
-    if (drwav__is_little_endian()) {
-        drwav__bswap_samples_s16(pBufferOut, framesRead*pWav->channels);
-    }
-
-    return framesRead;
-}
-
-
-DRWAV_API void drwav_u8_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    int r;
-    size_t i;
-    for (i = 0; i < sampleCount; ++i) {
-        int x = pIn[i];
-        r = x << 8;
-        r = r - 32768;
-        pOut[i] = (short)r;
-    }
-}
-
-DRWAV_API void drwav_s24_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    int r;
-    size_t i;
-    for (i = 0; i < sampleCount; ++i) {
-        int x = ((int)(((unsigned int)(((const drwav_uint8*)pIn)[i*3+0]) << 8) | ((unsigned int)(((const drwav_uint8*)pIn)[i*3+1]) << 16) | ((unsigned int)(((const drwav_uint8*)pIn)[i*3+2])) << 24)) >> 8;
-        r = x >> 8;
-        pOut[i] = (short)r;
-    }
-}
-
-DRWAV_API void drwav_s32_to_s16(drwav_int16* pOut, const drwav_int32* pIn, size_t sampleCount)
-{
-    int r;
-    size_t i;
-    for (i = 0; i < sampleCount; ++i) {
-        int x = pIn[i];
-        r = x >> 16;
-        pOut[i] = (short)r;
-    }
-}
-
-DRWAV_API void drwav_f32_to_s16(drwav_int16* pOut, const float* pIn, size_t sampleCount)
-{
-    int r;
-    size_t i;
-    for (i = 0; i < sampleCount; ++i) {
-        float x = pIn[i];
-        float c;
-        c = ((x < -1) ? -1 : ((x > 1) ? 1 : x));
-        c = c + 1;
-        r = (int)(c * 32767.5f);
-        r = r - 32768;
-        pOut[i] = (short)r;
-    }
-}
-
-DRWAV_API void drwav_f64_to_s16(drwav_int16* pOut, const double* pIn, size_t sampleCount)
-{
-    int r;
-    size_t i;
-    for (i = 0; i < sampleCount; ++i) {
-        double x = pIn[i];
-        double c;
-        c = ((x < -1) ? -1 : ((x > 1) ? 1 : x));
-        c = c + 1;
-        r = (int)(c * 32767.5);
-        r = r - 32768;
-        pOut[i] = (short)r;
-    }
-}
-
-DRWAV_API void drwav_alaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    size_t i;
-    for (i = 0; i < sampleCount; ++i) {
-        pOut[i] = drwav__alaw_to_s16(pIn[i]);
-    }
-}
-
-DRWAV_API void drwav_mulaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    size_t i;
-    for (i = 0; i < sampleCount; ++i) {
-        pOut[i] = drwav__mulaw_to_s16(pIn[i]);
-    }
-}
-
-
-
-static void drwav__pcm_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount, unsigned int bytesPerSample)
-{
-    unsigned int i;
-
-    /* Special case for 8-bit sample data because it's treated as unsigned. */
-    if (bytesPerSample == 1) {
-        drwav_u8_to_f32(pOut, pIn, sampleCount);
-        return;
-    }
-
-    /* Slightly more optimal implementation for common formats. */
-    if (bytesPerSample == 2) {
-        drwav_s16_to_f32(pOut, (const drwav_int16*)pIn, sampleCount);
-        return;
-    }
-    if (bytesPerSample == 3) {
-        drwav_s24_to_f32(pOut, pIn, sampleCount);
-        return;
-    }
-    if (bytesPerSample == 4) {
-        drwav_s32_to_f32(pOut, (const drwav_int32*)pIn, sampleCount);
-        return;
-    }
-
-
-    /* Anything more than 64 bits per sample is not supported. */
-    if (bytesPerSample > 8) {
-        DRWAV_ZERO_MEMORY(pOut, sampleCount * sizeof(*pOut));
-        return;
-    }
-
-
-    /* Generic, slow converter. */
-    for (i = 0; i < sampleCount; ++i) {
-        drwav_uint64 sample = 0;
-        unsigned int shift  = (8 - bytesPerSample) * 8;
-
-        unsigned int j;
-        for (j = 0; j < bytesPerSample; j += 1) {
-            DRWAV_ASSERT(j < 8);
-            sample |= (drwav_uint64)(pIn[j]) << shift;
-            shift  += 8;
-        }
-
-        pIn += j;
-        *pOut++ = (float)((drwav_int64)sample / 9223372036854775807.0);
-    }
-}
-
-static void drwav__ieee_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount, unsigned int bytesPerSample)
-{
-    if (bytesPerSample == 4) {
-        unsigned int i;
-        for (i = 0; i < sampleCount; ++i) {
-            *pOut++ = ((const float*)pIn)[i];
-        }
-        return;
-    } else if (bytesPerSample == 8) {
-        drwav_f64_to_f32(pOut, (const double*)pIn, sampleCount);
-        return;
-    } else {
-        /* Only supporting 32- and 64-bit float. Output silence in all other cases. Contributions welcome for 16-bit float. */
-        DRWAV_ZERO_MEMORY(pOut, sampleCount * sizeof(*pOut));
-        return;
-    }
-}
-
-
-static drwav_uint64 drwav_read_pcm_frames_f32__pcm(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
-{
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav__pcm_to_f32(pBufferOut, sampleData, (size_t)framesRead*pWav->channels, bytesPerFrame/pWav->channels);
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_f32__msadpcm(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
-{
-    /*
-    We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't
-    want to duplicate that code.
-    */
-    drwav_uint64 totalFramesRead = 0;
-    drwav_int16 samples16[2048];
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav_s16_to_f32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels));   /* <-- Safe cast because we're clamping to 2048. */
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_f32__ima(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
-{
-    /*
-    We're just going to borrow the implementation from the drwav_read_s16() since IMA-ADPCM is a little bit more complicated than other formats and I don't
-    want to duplicate that code.
-    */
-    drwav_uint64 totalFramesRead = 0;
-    drwav_int16 samples16[2048];
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav_s16_to_f32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels));   /* <-- Safe cast because we're clamping to 2048. */
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_f32__ieee(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
-{
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-    drwav_uint32 bytesPerFrame;
-
-    /* Fast path. */
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT && pWav->bitsPerSample == 32) {
-        return drwav_read_pcm_frames(pWav, framesToRead, pBufferOut);
-    }
-    
-    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav__ieee_to_f32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_f32__alaw(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
-{
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav_alaw_to_f32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_f32__mulaw(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
-{
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav_mulaw_to_f32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
-{
-    if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) {
-        return 0;
-    }
-
-    /* Don't try to read more samples than can potentially fit in the output buffer. */
-    if (framesToRead * pWav->channels * sizeof(float) > DRWAV_SIZE_MAX) {
-        framesToRead = DRWAV_SIZE_MAX / sizeof(float) / pWav->channels;
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) {
-        return drwav_read_pcm_frames_f32__pcm(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
-        return drwav_read_pcm_frames_f32__msadpcm(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) {
-        return drwav_read_pcm_frames_f32__ieee(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW) {
-        return drwav_read_pcm_frames_f32__alaw(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) {
-        return drwav_read_pcm_frames_f32__mulaw(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
-        return drwav_read_pcm_frames_f32__ima(pWav, framesToRead, pBufferOut);
-    }
-
-    return 0;
-}
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32le(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
-{
-    drwav_uint64 framesRead = drwav_read_pcm_frames_f32(pWav, framesToRead, pBufferOut);
-    if (!drwav__is_little_endian()) {
-        drwav__bswap_samples_f32(pBufferOut, framesRead*pWav->channels);
-    }
-
-    return framesRead;
-}
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32be(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
-{
-    drwav_uint64 framesRead = drwav_read_pcm_frames_f32(pWav, framesToRead, pBufferOut);
-    if (drwav__is_little_endian()) {
-        drwav__bswap_samples_f32(pBufferOut, framesRead*pWav->channels);
-    }
-
-    return framesRead;
-}
-
-
-DRWAV_API void drwav_u8_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-#ifdef DR_WAV_LIBSNDFILE_COMPAT
-    /*
-    It appears libsndfile uses slightly different logic for the u8 -> f32 conversion to dr_wav, which in my opinion is incorrect. It appears
-    libsndfile performs the conversion something like "f32 = (u8 / 256) * 2 - 1", however I think it should be "f32 = (u8 / 255) * 2 - 1" (note
-    the divisor of 256 vs 255). I use libsndfile as a benchmark for testing, so I'm therefore leaving this block here just for my automated
-    correctness testing. This is disabled by default.
-    */
-    for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = (pIn[i] / 256.0f) * 2 - 1;
-    }
-#else
-    for (i = 0; i < sampleCount; ++i) {
-        float x = pIn[i];
-        x = x * 0.00784313725490196078f;    /* 0..255 to 0..2 */
-        x = x - 1;                          /* 0..2 to -1..1 */
-
-        *pOut++ = x;
-    }
-#endif
-}
-
-DRWAV_API void drwav_s16_to_f32(float* pOut, const drwav_int16* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = pIn[i] * 0.000030517578125f;
-    }
-}
-
-DRWAV_API void drwav_s24_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        double x = (double)(((drwav_int32)(((drwav_uint32)(pIn[i*3+0]) << 8) | ((drwav_uint32)(pIn[i*3+1]) << 16) | ((drwav_uint32)(pIn[i*3+2])) << 24)) >> 8);
-        *pOut++ = (float)(x * 0.00000011920928955078125);
-    }
-}
-
-DRWAV_API void drwav_s32_to_f32(float* pOut, const drwav_int32* pIn, size_t sampleCount)
-{
-    size_t i;
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = (float)(pIn[i] / 2147483648.0);
-    }
-}
-
-DRWAV_API void drwav_f64_to_f32(float* pOut, const double* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = (float)pIn[i];
-    }
-}
-
-DRWAV_API void drwav_alaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = drwav__alaw_to_s16(pIn[i]) / 32768.0f;
-    }
-}
-
-DRWAV_API void drwav_mulaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = drwav__mulaw_to_s16(pIn[i]) / 32768.0f;
-    }
-}
-
-
-
-static void drwav__pcm_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
-{
-    unsigned int i;
-
-    /* Special case for 8-bit sample data because it's treated as unsigned. */
-    if (bytesPerSample == 1) {
-        drwav_u8_to_s32(pOut, pIn, totalSampleCount);
-        return;
-    }
-
-    /* Slightly more optimal implementation for common formats. */
-    if (bytesPerSample == 2) {
-        drwav_s16_to_s32(pOut, (const drwav_int16*)pIn, totalSampleCount);
-        return;
-    }
-    if (bytesPerSample == 3) {
-        drwav_s24_to_s32(pOut, pIn, totalSampleCount);
-        return;
-    }
-    if (bytesPerSample == 4) {
-        for (i = 0; i < totalSampleCount; ++i) {
-           *pOut++ = ((const drwav_int32*)pIn)[i];
-        }
-        return;
-    }
-
-
-    /* Anything more than 64 bits per sample is not supported. */
-    if (bytesPerSample > 8) {
-        DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut));
-        return;
-    }
-
-
-    /* Generic, slow converter. */
-    for (i = 0; i < totalSampleCount; ++i) {
-        drwav_uint64 sample = 0;
-        unsigned int shift  = (8 - bytesPerSample) * 8;
-
-        unsigned int j;
-        for (j = 0; j < bytesPerSample; j += 1) {
-            DRWAV_ASSERT(j < 8);
-            sample |= (drwav_uint64)(pIn[j]) << shift;
-            shift  += 8;
-        }
-
-        pIn += j;
-        *pOut++ = (drwav_int32)((drwav_int64)sample >> 32);
-    }
-}
-
-static void drwav__ieee_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
-{
-    if (bytesPerSample == 4) {
-        drwav_f32_to_s32(pOut, (const float*)pIn, totalSampleCount);
-        return;
-    } else if (bytesPerSample == 8) {
-        drwav_f64_to_s32(pOut, (const double*)pIn, totalSampleCount);
-        return;
-    } else {
-        /* Only supporting 32- and 64-bit float. Output silence in all other cases. Contributions welcome for 16-bit float. */
-        DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut));
-        return;
-    }
-}
-
-
-static drwav_uint64 drwav_read_pcm_frames_s32__pcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
-{
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-    drwav_uint32 bytesPerFrame;
-
-    /* Fast path. */
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bitsPerSample == 32) {
-        return drwav_read_pcm_frames(pWav, framesToRead, pBufferOut);
-    }
-    
-    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav__pcm_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_s32__msadpcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
-{
-    /*
-    We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't
-    want to duplicate that code.
-    */
-    drwav_uint64 totalFramesRead = 0;
-    drwav_int16 samples16[2048];
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav_s16_to_s32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels));   /* <-- Safe cast because we're clamping to 2048. */
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_s32__ima(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
-{
-    /*
-    We're just going to borrow the implementation from the drwav_read_s16() since IMA-ADPCM is a little bit more complicated than other formats and I don't
-    want to duplicate that code.
-    */
-    drwav_uint64 totalFramesRead = 0;
-    drwav_int16 samples16[2048];
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav_s16_to_s32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels));   /* <-- Safe cast because we're clamping to 2048. */
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_s32__ieee(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
-{
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav__ieee_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_s32__alaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
-{
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav_alaw_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-static drwav_uint64 drwav_read_pcm_frames_s32__mulaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
-{
-    drwav_uint64 totalFramesRead;
-    drwav_uint8 sampleData[4096];
-
-    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
-    if (bytesPerFrame == 0) {
-        return 0;
-    }
-
-    totalFramesRead = 0;
-
-    while (framesToRead > 0) {
-        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
-        if (framesRead == 0) {
-            break;
-        }
-
-        drwav_mulaw_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
-
-        pBufferOut      += framesRead*pWav->channels;
-        framesToRead    -= framesRead;
-        totalFramesRead += framesRead;
-    }
-
-    return totalFramesRead;
-}
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
-{
-    if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) {
-        return 0;
-    }
-
-    /* Don't try to read more samples than can potentially fit in the output buffer. */
-    if (framesToRead * pWav->channels * sizeof(drwav_int32) > DRWAV_SIZE_MAX) {
-        framesToRead = DRWAV_SIZE_MAX / sizeof(drwav_int32) / pWav->channels;
-    }
-
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) {
-        return drwav_read_pcm_frames_s32__pcm(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
-        return drwav_read_pcm_frames_s32__msadpcm(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) {
-        return drwav_read_pcm_frames_s32__ieee(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW) {
-        return drwav_read_pcm_frames_s32__alaw(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) {
-        return drwav_read_pcm_frames_s32__mulaw(pWav, framesToRead, pBufferOut);
-    }
-
-    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
-        return drwav_read_pcm_frames_s32__ima(pWav, framesToRead, pBufferOut);
-    }
-
-    return 0;
-}
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32le(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
-{
-    drwav_uint64 framesRead = drwav_read_pcm_frames_s32(pWav, framesToRead, pBufferOut);
-    if (!drwav__is_little_endian()) {
-        drwav__bswap_samples_s32(pBufferOut, framesRead*pWav->channels);
-    }
-
-    return framesRead;
-}
-
-DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32be(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
-{
-    drwav_uint64 framesRead = drwav_read_pcm_frames_s32(pWav, framesToRead, pBufferOut);
-    if (drwav__is_little_endian()) {
-        drwav__bswap_samples_s32(pBufferOut, framesRead*pWav->channels);
-    }
-
-    return framesRead;
-}
-
-
-DRWAV_API void drwav_u8_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = ((int)pIn[i] - 128) << 24;
-    }
-}
-
-DRWAV_API void drwav_s16_to_s32(drwav_int32* pOut, const drwav_int16* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = pIn[i] << 16;
-    }
-}
-
-DRWAV_API void drwav_s24_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        unsigned int s0 = pIn[i*3 + 0];
-        unsigned int s1 = pIn[i*3 + 1];
-        unsigned int s2 = pIn[i*3 + 2];
-
-        drwav_int32 sample32 = (drwav_int32)((s0 << 8) | (s1 << 16) | (s2 << 24));
-        *pOut++ = sample32;
-    }
-}
-
-DRWAV_API void drwav_f32_to_s32(drwav_int32* pOut, const float* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = (drwav_int32)(2147483648.0 * pIn[i]);
-    }
-}
-
-DRWAV_API void drwav_f64_to_s32(drwav_int32* pOut, const double* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = (drwav_int32)(2147483648.0 * pIn[i]);
-    }
-}
-
-DRWAV_API void drwav_alaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i = 0; i < sampleCount; ++i) {
-        *pOut++ = ((drwav_int32)drwav__alaw_to_s16(pIn[i])) << 16;
-    }
-}
-
-DRWAV_API void drwav_mulaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
-{
-    size_t i;
-
-    if (pOut == NULL || pIn == NULL) {
-        return;
-    }
-
-    for (i= 0; i < sampleCount; ++i) {
-        *pOut++ = ((drwav_int32)drwav__mulaw_to_s16(pIn[i])) << 16;
-    }
-}
-
-
-
-static drwav_int16* drwav__read_pcm_frames_and_close_s16(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount)
-{
-    drwav_uint64 sampleDataSize;
-    drwav_int16* pSampleData;
-    drwav_uint64 framesRead;
-
-    DRWAV_ASSERT(pWav != NULL);
-
-    sampleDataSize = pWav->totalPCMFrameCount * pWav->channels * sizeof(drwav_int16);
-    if (sampleDataSize > DRWAV_SIZE_MAX) {
-        drwav_uninit(pWav);
-        return NULL;    /* File's too big. */
-    }
-
-    pSampleData = (drwav_int16*)drwav__malloc_from_callbacks((size_t)sampleDataSize, &pWav->allocationCallbacks); /* <-- Safe cast due to the check above. */
-    if (pSampleData == NULL) {
-        drwav_uninit(pWav);
-        return NULL;    /* Failed to allocate memory. */
-    }
-
-    framesRead = drwav_read_pcm_frames_s16(pWav, (size_t)pWav->totalPCMFrameCount, pSampleData);
-    if (framesRead != pWav->totalPCMFrameCount) {
-        drwav__free_from_callbacks(pSampleData, &pWav->allocationCallbacks);
-        drwav_uninit(pWav);
-        return NULL;    /* There was an error reading the samples. */
-    }
-
-    drwav_uninit(pWav);
-
-    if (sampleRate) {
-        *sampleRate = pWav->sampleRate;
-    }
-    if (channels) {
-        *channels = pWav->channels;
-    }
-    if (totalFrameCount) {
-        *totalFrameCount = pWav->totalPCMFrameCount;
-    }
-
-    return pSampleData;
-}
-
-static float* drwav__read_pcm_frames_and_close_f32(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount)
-{
-    drwav_uint64 sampleDataSize;
-    float* pSampleData;
-    drwav_uint64 framesRead;
-
-    DRWAV_ASSERT(pWav != NULL);
-
-    sampleDataSize = pWav->totalPCMFrameCount * pWav->channels * sizeof(float);
-    if (sampleDataSize > DRWAV_SIZE_MAX) {
-        drwav_uninit(pWav);
-        return NULL;    /* File's too big. */
-    }
-
-    pSampleData = (float*)drwav__malloc_from_callbacks((size_t)sampleDataSize, &pWav->allocationCallbacks); /* <-- Safe cast due to the check above. */
-    if (pSampleData == NULL) {
-        drwav_uninit(pWav);
-        return NULL;    /* Failed to allocate memory. */
-    }
-
-    framesRead = drwav_read_pcm_frames_f32(pWav, (size_t)pWav->totalPCMFrameCount, pSampleData);
-    if (framesRead != pWav->totalPCMFrameCount) {
-        drwav__free_from_callbacks(pSampleData, &pWav->allocationCallbacks);
-        drwav_uninit(pWav);
-        return NULL;    /* There was an error reading the samples. */
-    }
-
-    drwav_uninit(pWav);
-
-    if (sampleRate) {
-        *sampleRate = pWav->sampleRate;
-    }
-    if (channels) {
-        *channels = pWav->channels;
-    }
-    if (totalFrameCount) {
-        *totalFrameCount = pWav->totalPCMFrameCount;
-    }
-
-    return pSampleData;
-}
-
-static drwav_int32* drwav__read_pcm_frames_and_close_s32(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount)
-{
-    drwav_uint64 sampleDataSize;
-    drwav_int32* pSampleData;
-    drwav_uint64 framesRead;
-
-    DRWAV_ASSERT(pWav != NULL);
-
-    sampleDataSize = pWav->totalPCMFrameCount * pWav->channels * sizeof(drwav_int32);
-    if (sampleDataSize > DRWAV_SIZE_MAX) {
-        drwav_uninit(pWav);
-        return NULL;    /* File's too big. */
-    }
-
-    pSampleData = (drwav_int32*)drwav__malloc_from_callbacks((size_t)sampleDataSize, &pWav->allocationCallbacks); /* <-- Safe cast due to the check above. */
-    if (pSampleData == NULL) {
-        drwav_uninit(pWav);
-        return NULL;    /* Failed to allocate memory. */
-    }
-
-    framesRead = drwav_read_pcm_frames_s32(pWav, (size_t)pWav->totalPCMFrameCount, pSampleData);
-    if (framesRead != pWav->totalPCMFrameCount) {
-        drwav__free_from_callbacks(pSampleData, &pWav->allocationCallbacks);
-        drwav_uninit(pWav);
-        return NULL;    /* There was an error reading the samples. */
-    }
-
-    drwav_uninit(pWav);
-
-    if (sampleRate) {
-        *sampleRate = pWav->sampleRate;
-    }
-    if (channels) {
-        *channels = pWav->channels;
-    }
-    if (totalFrameCount) {
-        *totalFrameCount = pWav->totalPCMFrameCount;
-    }
-
-    return pSampleData;
-}
-
-
-
-DRWAV_API drwav_int16* drwav_open_and_read_pcm_frames_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init(&wav, onRead, onSeek, pUserData, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-
-DRWAV_API float* drwav_open_and_read_pcm_frames_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init(&wav, onRead, onSeek, pUserData, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-
-DRWAV_API drwav_int32* drwav_open_and_read_pcm_frames_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init(&wav, onRead, onSeek, pUserData, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-
-#ifndef DR_WAV_NO_STDIO
-DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init_file(&wav, filename, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-
-DRWAV_API float* drwav_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init_file(&wav, filename, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-
-DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init_file(&wav, filename, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-
-
-DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init_file_w(&wav, filename, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-
-DRWAV_API float* drwav_open_file_and_read_pcm_frames_f32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init_file_w(&wav, filename, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-
-DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init_file_w(&wav, filename, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-#endif
-
-DRWAV_API drwav_int16* drwav_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init_memory(&wav, data, dataSize, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-
-DRWAV_API float* drwav_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init_memory(&wav, data, dataSize, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-
-DRWAV_API drwav_int32* drwav_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    drwav wav;
-
-    if (channelsOut) {
-        *channelsOut = 0;
-    }
-    if (sampleRateOut) {
-        *sampleRateOut = 0;
-    }
-    if (totalFrameCountOut) {
-        *totalFrameCountOut = 0;
-    }
-
-    if (!drwav_init_memory(&wav, data, dataSize, pAllocationCallbacks)) {
-        return NULL;
-    }
-
-    return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
-}
-#endif  /* DR_WAV_NO_CONVERSION_API */
-
-
-DRWAV_API void drwav_free(void* p, const drwav_allocation_callbacks* pAllocationCallbacks)
-{
-    if (pAllocationCallbacks != NULL) {
-        drwav__free_from_callbacks(p, pAllocationCallbacks);
-    } else {
-        drwav__free_default(p, NULL);
-    }
-}
-
-DRWAV_API drwav_uint16 drwav_bytes_to_u16(const drwav_uint8* data)
-{
-    return drwav__bytes_to_u16(data);
-}
-
-DRWAV_API drwav_int16 drwav_bytes_to_s16(const drwav_uint8* data)
-{
-    return drwav__bytes_to_s16(data);
-}
-
-DRWAV_API drwav_uint32 drwav_bytes_to_u32(const drwav_uint8* data)
-{
-    return drwav__bytes_to_u32(data);
-}
-
-DRWAV_API drwav_int32 drwav_bytes_to_s32(const drwav_uint8* data)
-{
-    return drwav__bytes_to_s32(data);
-}
-
-DRWAV_API drwav_uint64 drwav_bytes_to_u64(const drwav_uint8* data)
-{
-    return drwav__bytes_to_u64(data);
-}
-
-DRWAV_API drwav_int64 drwav_bytes_to_s64(const drwav_uint8* data)
-{
-    return drwav__bytes_to_s64(data);
-}
-
-
-DRWAV_API drwav_bool32 drwav_guid_equal(const drwav_uint8 a[16], const drwav_uint8 b[16])
-{
-    return drwav__guid_equal(a, b);
-}
-
-DRWAV_API drwav_bool32 drwav_fourcc_equal(const drwav_uint8* a, const char* b)
-{
-    return drwav__fourcc_equal(a, b);
-}
-
-#endif  /* DR_WAV_IMPLEMENTATION */
-
-/*
-RELEASE NOTES - v0.11.0
-=======================
-Version 0.11.0 has breaking API changes.
-
-Improved Client-Defined Memory Allocation
------------------------------------------
-The main change with this release is the addition of a more flexible way of implementing custom memory allocation routines. The
-existing system of DRWAV_MALLOC, DRWAV_REALLOC and DRWAV_FREE are still in place and will be used by default when no custom
-allocation callbacks are specified.
-
-To use the new system, you pass in a pointer to a drwav_allocation_callbacks object to drwav_init() and family, like this:
-
-    void* my_malloc(size_t sz, void* pUserData)
-    {
-        return malloc(sz);
-    }
-    void* my_realloc(void* p, size_t sz, void* pUserData)
-    {
-        return realloc(p, sz);
-    }
-    void my_free(void* p, void* pUserData)
-    {
-        free(p);
-    }
-
-    ...
-
-    drwav_allocation_callbacks allocationCallbacks;
-    allocationCallbacks.pUserData = &myData;
-    allocationCallbacks.onMalloc  = my_malloc;
-    allocationCallbacks.onRealloc = my_realloc;
-    allocationCallbacks.onFree    = my_free;
-    drwav_init_file(&wav, "my_file.wav", &allocationCallbacks);
-
-The advantage of this new system is that it allows you to specify user data which will be passed in to the allocation routines.
-
-Passing in null for the allocation callbacks object will cause dr_wav to use defaults which is the same as DRWAV_MALLOC,
-DRWAV_REALLOC and DRWAV_FREE and the equivalent of how it worked in previous versions.
-
-Every API that opens a drwav object now takes this extra parameter. These include the following:
-
-    drwav_init()
-    drwav_init_ex()
-    drwav_init_file()
-    drwav_init_file_ex()
-    drwav_init_file_w()
-    drwav_init_file_w_ex()
-    drwav_init_memory()
-    drwav_init_memory_ex()
-    drwav_init_write()
-    drwav_init_write_sequential()
-    drwav_init_write_sequential_pcm_frames()
-    drwav_init_file_write()
-    drwav_init_file_write_sequential()
-    drwav_init_file_write_sequential_pcm_frames()
-    drwav_init_file_write_w()
-    drwav_init_file_write_sequential_w()
-    drwav_init_file_write_sequential_pcm_frames_w()
-    drwav_init_memory_write()
-    drwav_init_memory_write_sequential()
-    drwav_init_memory_write_sequential_pcm_frames()
-    drwav_open_and_read_pcm_frames_s16()
-    drwav_open_and_read_pcm_frames_f32()
-    drwav_open_and_read_pcm_frames_s32()
-    drwav_open_file_and_read_pcm_frames_s16()
-    drwav_open_file_and_read_pcm_frames_f32()
-    drwav_open_file_and_read_pcm_frames_s32()
-    drwav_open_file_and_read_pcm_frames_s16_w()
-    drwav_open_file_and_read_pcm_frames_f32_w()
-    drwav_open_file_and_read_pcm_frames_s32_w()
-    drwav_open_memory_and_read_pcm_frames_s16()
-    drwav_open_memory_and_read_pcm_frames_f32()
-    drwav_open_memory_and_read_pcm_frames_s32()
-
-Endian Improvements
--------------------
-Previously, the following APIs returned little-endian audio data. These now return native-endian data. This improves compatibility
-on big-endian architectures.
-
-    drwav_read_pcm_frames()
-    drwav_read_pcm_frames_s16()
-    drwav_read_pcm_frames_s32()
-    drwav_read_pcm_frames_f32()
-    drwav_open_and_read_pcm_frames_s16()
-    drwav_open_and_read_pcm_frames_s32()
-    drwav_open_and_read_pcm_frames_f32()
-    drwav_open_file_and_read_pcm_frames_s16()
-    drwav_open_file_and_read_pcm_frames_s32()
-    drwav_open_file_and_read_pcm_frames_f32()
-    drwav_open_file_and_read_pcm_frames_s16_w()
-    drwav_open_file_and_read_pcm_frames_s32_w()
-    drwav_open_file_and_read_pcm_frames_f32_w()
-    drwav_open_memory_and_read_pcm_frames_s16()
-    drwav_open_memory_and_read_pcm_frames_s32()
-    drwav_open_memory_and_read_pcm_frames_f32()
-
-APIs have been added to give you explicit control over whether or not audio data is read or written in big- or little-endian byte
-order:
-
-    drwav_read_pcm_frames_le()
-    drwav_read_pcm_frames_be()
-    drwav_read_pcm_frames_s16le()
-    drwav_read_pcm_frames_s16be()
-    drwav_read_pcm_frames_f32le()
-    drwav_read_pcm_frames_f32be()
-    drwav_read_pcm_frames_s32le()
-    drwav_read_pcm_frames_s32be()
-    drwav_write_pcm_frames_le()
-    drwav_write_pcm_frames_be()
-
-Removed APIs
-------------
-The following APIs were deprecated in version 0.10.0 and have now been removed:
-
-    drwav_open()
-    drwav_open_ex()
-    drwav_open_write()
-    drwav_open_write_sequential()
-    drwav_open_file()
-    drwav_open_file_ex()
-    drwav_open_file_write()
-    drwav_open_file_write_sequential()
-    drwav_open_memory()
-    drwav_open_memory_ex()
-    drwav_open_memory_write()
-    drwav_open_memory_write_sequential()
-    drwav_close()
-
-
-
-RELEASE NOTES - v0.10.0
-=======================
-Version 0.10.0 has breaking API changes. There are no significant bug fixes in this release, so if you are affected you do
-not need to upgrade.
-
-Removed APIs
-------------
-The following APIs were deprecated in version 0.9.0 and have been completely removed in version 0.10.0:
-
-    drwav_read()
-    drwav_read_s16()
-    drwav_read_f32()
-    drwav_read_s32()
-    drwav_seek_to_sample()
-    drwav_write()
-    drwav_open_and_read_s16()
-    drwav_open_and_read_f32()
-    drwav_open_and_read_s32()
-    drwav_open_file_and_read_s16()
-    drwav_open_file_and_read_f32()
-    drwav_open_file_and_read_s32()
-    drwav_open_memory_and_read_s16()
-    drwav_open_memory_and_read_f32()
-    drwav_open_memory_and_read_s32()
-    drwav::totalSampleCount
-
-See release notes for version 0.9.0 at the bottom of this file for replacement APIs.
-
-Deprecated APIs
----------------
-The following APIs have been deprecated. There is a confusing and completely arbitrary difference between drwav_init*() and
-drwav_open*(), where drwav_init*() initializes a pre-allocated drwav object, whereas drwav_open*() will first allocated a
-drwav object on the heap and then initialize it. drwav_open*() has been deprecated which means you must now use a pre-
-allocated drwav object with drwav_init*(). If you need the previous functionality, you can just do a malloc() followed by
-a called to one of the drwav_init*() APIs.
-
-    drwav_open()
-    drwav_open_ex()
-    drwav_open_write()
-    drwav_open_write_sequential()
-    drwav_open_file()
-    drwav_open_file_ex()
-    drwav_open_file_write()
-    drwav_open_file_write_sequential()
-    drwav_open_memory()
-    drwav_open_memory_ex()
-    drwav_open_memory_write()
-    drwav_open_memory_write_sequential()
-    drwav_close()
-
-These APIs will be removed completely in a future version. The rationale for this change is to remove confusion between the
-two different ways to initialize a drwav object.
-*/
-
-/*
-REVISION HISTORY
-================
-v0.12.5 - 2020-05-27
-  - Minor documentation fix.
-
-v0.12.4 - 2020-05-16
-  - Replace assert() with DRWAV_ASSERT().
-  - Add compile-time and run-time version querying.
-    - DRWAV_VERSION_MINOR
-    - DRWAV_VERSION_MAJOR
-    - DRWAV_VERSION_REVISION
-    - DRWAV_VERSION_STRING
-    - drwav_version()
-    - drwav_version_string()
-
-v0.12.3 - 2020-04-30
-  - Fix compilation errors with VC6.
-
-v0.12.2 - 2020-04-21
-  - Fix a bug where drwav_init_file() does not close the file handle after attempting to load an erroneous file.
-
-v0.12.1 - 2020-04-13
-  - Fix some pedantic warnings.
-
-v0.12.0 - 2020-04-04
-  - API CHANGE: Add container and format parameters to the chunk callback.
-  - Minor documentation updates.
-
-v0.11.5 - 2020-03-07
-  - Fix compilation error with Visual Studio .NET 2003.
-
-v0.11.4 - 2020-01-29
-  - Fix some static analysis warnings.
-  - Fix a bug when reading f32 samples from an A-law encoded stream.
-
-v0.11.3 - 2020-01-12
-  - Minor changes to some f32 format conversion routines.
-  - Minor bug fix for ADPCM conversion when end of file is reached.
-
-v0.11.2 - 2019-12-02
-  - Fix a possible crash when using custom memory allocators without a custom realloc() implementation.
-  - Fix an integer overflow bug.
-  - Fix a null pointer dereference bug.
-  - Add limits to sample rate, channels and bits per sample to tighten up some validation.
-
-v0.11.1 - 2019-10-07
-  - Internal code clean up.
-
-v0.11.0 - 2019-10-06
-  - API CHANGE: Add support for user defined memory allocation routines. This system allows the program to specify their own memory allocation
-    routines with a user data pointer for client-specific contextual data. This adds an extra parameter to the end of the following APIs:
-    - drwav_init()
-    - drwav_init_ex()
-    - drwav_init_file()
-    - drwav_init_file_ex()
-    - drwav_init_file_w()
-    - drwav_init_file_w_ex()
-    - drwav_init_memory()
-    - drwav_init_memory_ex()
-    - drwav_init_write()
-    - drwav_init_write_sequential()
-    - drwav_init_write_sequential_pcm_frames()
-    - drwav_init_file_write()
-    - drwav_init_file_write_sequential()
-    - drwav_init_file_write_sequential_pcm_frames()
-    - drwav_init_file_write_w()
-    - drwav_init_file_write_sequential_w()
-    - drwav_init_file_write_sequential_pcm_frames_w()
-    - drwav_init_memory_write()
-    - drwav_init_memory_write_sequential()
-    - drwav_init_memory_write_sequential_pcm_frames()
-    - drwav_open_and_read_pcm_frames_s16()
-    - drwav_open_and_read_pcm_frames_f32()
-    - drwav_open_and_read_pcm_frames_s32()
-    - drwav_open_file_and_read_pcm_frames_s16()
-    - drwav_open_file_and_read_pcm_frames_f32()
-    - drwav_open_file_and_read_pcm_frames_s32()
-    - drwav_open_file_and_read_pcm_frames_s16_w()
-    - drwav_open_file_and_read_pcm_frames_f32_w()
-    - drwav_open_file_and_read_pcm_frames_s32_w()
-    - drwav_open_memory_and_read_pcm_frames_s16()
-    - drwav_open_memory_and_read_pcm_frames_f32()
-    - drwav_open_memory_and_read_pcm_frames_s32()
-    Set this extra parameter to NULL to use defaults which is the same as the previous behaviour. Setting this NULL will use
-    DRWAV_MALLOC, DRWAV_REALLOC and DRWAV_FREE.
-  - Add support for reading and writing PCM frames in an explicit endianness. New APIs:
-    - drwav_read_pcm_frames_le()
-    - drwav_read_pcm_frames_be()
-    - drwav_read_pcm_frames_s16le()
-    - drwav_read_pcm_frames_s16be()
-    - drwav_read_pcm_frames_f32le()
-    - drwav_read_pcm_frames_f32be()
-    - drwav_read_pcm_frames_s32le()
-    - drwav_read_pcm_frames_s32be()
-    - drwav_write_pcm_frames_le()
-    - drwav_write_pcm_frames_be()
-  - Remove deprecated APIs.
-  - API CHANGE: The following APIs now return native-endian data. Previously they returned little-endian data.
-    - drwav_read_pcm_frames()
-    - drwav_read_pcm_frames_s16()
-    - drwav_read_pcm_frames_s32()
-    - drwav_read_pcm_frames_f32()
-    - drwav_open_and_read_pcm_frames_s16()
-    - drwav_open_and_read_pcm_frames_s32()
-    - drwav_open_and_read_pcm_frames_f32()
-    - drwav_open_file_and_read_pcm_frames_s16()
-    - drwav_open_file_and_read_pcm_frames_s32()
-    - drwav_open_file_and_read_pcm_frames_f32()
-    - drwav_open_file_and_read_pcm_frames_s16_w()
-    - drwav_open_file_and_read_pcm_frames_s32_w()
-    - drwav_open_file_and_read_pcm_frames_f32_w()
-    - drwav_open_memory_and_read_pcm_frames_s16()
-    - drwav_open_memory_and_read_pcm_frames_s32()
-    - drwav_open_memory_and_read_pcm_frames_f32()
-
-v0.10.1 - 2019-08-31
-  - Correctly handle partial trailing ADPCM blocks.
-
-v0.10.0 - 2019-08-04
-  - Remove deprecated APIs.
-  - Add wchar_t variants for file loading APIs:
-      drwav_init_file_w()
-      drwav_init_file_ex_w()
-      drwav_init_file_write_w()
-      drwav_init_file_write_sequential_w()
-  - Add drwav_target_write_size_bytes() which calculates the total size in bytes of a WAV file given a format and sample count.
-  - Add APIs for specifying the PCM frame count instead of the sample count when opening in sequential write mode:
-      drwav_init_write_sequential_pcm_frames()
-      drwav_init_file_write_sequential_pcm_frames()
-      drwav_init_file_write_sequential_pcm_frames_w()
-      drwav_init_memory_write_sequential_pcm_frames()
-  - Deprecate drwav_open*() and drwav_close():
-      drwav_open()
-      drwav_open_ex()
-      drwav_open_write()
-      drwav_open_write_sequential()
-      drwav_open_file()
-      drwav_open_file_ex()
-      drwav_open_file_write()
-      drwav_open_file_write_sequential()
-      drwav_open_memory()
-      drwav_open_memory_ex()
-      drwav_open_memory_write()
-      drwav_open_memory_write_sequential()
-      drwav_close()
-  - Minor documentation updates.
-
-v0.9.2 - 2019-05-21
-  - Fix warnings.
-
-v0.9.1 - 2019-05-05
-  - Add support for C89.
-  - Change license to choice of public domain or MIT-0.
-
-v0.9.0 - 2018-12-16
-  - API CHANGE: Add new reading APIs for reading by PCM frames instead of samples. Old APIs have been deprecated and
-    will be removed in v0.10.0. Deprecated APIs and their replacements:
-      drwav_read()                     -> drwav_read_pcm_frames()
-      drwav_read_s16()                 -> drwav_read_pcm_frames_s16()
-      drwav_read_f32()                 -> drwav_read_pcm_frames_f32()
-      drwav_read_s32()                 -> drwav_read_pcm_frames_s32()
-      drwav_seek_to_sample()           -> drwav_seek_to_pcm_frame()
-      drwav_write()                    -> drwav_write_pcm_frames()
-      drwav_open_and_read_s16()        -> drwav_open_and_read_pcm_frames_s16()
-      drwav_open_and_read_f32()        -> drwav_open_and_read_pcm_frames_f32()
-      drwav_open_and_read_s32()        -> drwav_open_and_read_pcm_frames_s32()
-      drwav_open_file_and_read_s16()   -> drwav_open_file_and_read_pcm_frames_s16()
-      drwav_open_file_and_read_f32()   -> drwav_open_file_and_read_pcm_frames_f32()
-      drwav_open_file_and_read_s32()   -> drwav_open_file_and_read_pcm_frames_s32()
-      drwav_open_memory_and_read_s16() -> drwav_open_memory_and_read_pcm_frames_s16()
-      drwav_open_memory_and_read_f32() -> drwav_open_memory_and_read_pcm_frames_f32()
-      drwav_open_memory_and_read_s32() -> drwav_open_memory_and_read_pcm_frames_s32()
-      drwav::totalSampleCount          -> drwav::totalPCMFrameCount
-  - API CHANGE: Rename drwav_open_and_read_file_*() to drwav_open_file_and_read_*().
-  - API CHANGE: Rename drwav_open_and_read_memory_*() to drwav_open_memory_and_read_*().
-  - Add built-in support for smpl chunks.
-  - Add support for firing a callback for each chunk in the file at initialization time.
-    - This is enabled through the drwav_init_ex(), etc. family of APIs.
-  - Handle invalid FMT chunks more robustly.
-
-v0.8.5 - 2018-09-11
-  - Const correctness.
-  - Fix a potential stack overflow.
-
-v0.8.4 - 2018-08-07
-  - Improve 64-bit detection.
-
-v0.8.3 - 2018-08-05
-  - Fix C++ build on older versions of GCC.
-
-v0.8.2 - 2018-08-02
-  - Fix some big-endian bugs.
-
-v0.8.1 - 2018-06-29
-  - Add support for sequential writing APIs.
-  - Disable seeking in write mode.
-  - Fix bugs with Wave64.
-  - Fix typos.
-
-v0.8 - 2018-04-27
-  - Bug fix.
-  - Start using major.minor.revision versioning.
-
-v0.7f - 2018-02-05
-  - Restrict ADPCM formats to a maximum of 2 channels.
-
-v0.7e - 2018-02-02
-  - Fix a crash.
-
-v0.7d - 2018-02-01
-  - Fix a crash.
-
-v0.7c - 2018-02-01
-  - Set drwav.bytesPerSample to 0 for all compressed formats.
-  - Fix a crash when reading 16-bit floating point WAV files. In this case dr_wav will output silence for
-    all format conversion reading APIs (*_s16, *_s32, *_f32 APIs).
-  - Fix some divide-by-zero errors.
-
-v0.7b - 2018-01-22
-  - Fix errors with seeking of compressed formats.
-  - Fix compilation error when DR_WAV_NO_CONVERSION_API
-
-v0.7a - 2017-11-17
-  - Fix some GCC warnings.
-
-v0.7 - 2017-11-04
-  - Add writing APIs.
-
-v0.6 - 2017-08-16
-  - API CHANGE: Rename dr_* types to drwav_*.
-  - Add support for custom implementations of malloc(), realloc(), etc.
-  - Add support for Microsoft ADPCM.
-  - Add support for IMA ADPCM (DVI, format code 0x11).
-  - Optimizations to drwav_read_s16().
-  - Bug fixes.
-
-v0.5g - 2017-07-16
-  - Change underlying type for booleans to unsigned.
-
-v0.5f - 2017-04-04
-  - Fix a minor bug with drwav_open_and_read_s16() and family.
-
-v0.5e - 2016-12-29
-  - Added support for reading samples as signed 16-bit integers. Use the _s16() family of APIs for this.
-  - Minor fixes to documentation.
-
-v0.5d - 2016-12-28
-  - Use drwav_int* and drwav_uint* sized types to improve compiler support.
-
-v0.5c - 2016-11-11
-  - Properly handle JUNK chunks that come before the FMT chunk.
-
-v0.5b - 2016-10-23
-  - A minor change to drwav_bool8 and drwav_bool32 types.
-
-v0.5a - 2016-10-11
-  - Fixed a bug with drwav_open_and_read() and family due to incorrect argument ordering.
-  - Improve A-law and mu-law efficiency.
-
-v0.5 - 2016-09-29
-  - API CHANGE. Swap the order of "channels" and "sampleRate" parameters in drwav_open_and_read*(). Rationale for this is to
-    keep it consistent with dr_audio and dr_flac.
-
-v0.4b - 2016-09-18
-  - Fixed a typo in documentation.
-
-v0.4a - 2016-09-18
-  - Fixed a typo.
-  - Change date format to ISO 8601 (YYYY-MM-DD)
-
-v0.4 - 2016-07-13
-  - API CHANGE. Make onSeek consistent with dr_flac.
-  - API CHANGE. Rename drwav_seek() to drwav_seek_to_sample() for clarity and consistency with dr_flac.
-  - Added support for Sony Wave64.
-
-v0.3a - 2016-05-28
-  - API CHANGE. Return drwav_bool32 instead of int in onSeek callback.
-  - Fixed a memory leak.
-
-v0.3 - 2016-05-22
-  - Lots of API changes for consistency.
-
-v0.2a - 2016-05-16
-  - Fixed Linux/GCC build.
-
-v0.2 - 2016-05-11
-  - Added support for reading data as signed 32-bit PCM for consistency with dr_flac.
-
-v0.1a - 2016-05-07
-  - Fixed a bug in drwav_open_file() where the file handle would not be closed if the loader failed to initialize.
-
-v0.1 - 2016-05-04
-  - Initial versioned release.
-*/
-
-/*
-This software is available as a choice of the following licenses. Choose
-whichever you prefer.
-
-===============================================================================
-ALTERNATIVE 1 - Public Domain (www.unlicense.org)
-===============================================================================
-This is free and unencumbered software released into the public domain.
-
-Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
-software, either in source code form or as a compiled binary, for any purpose,
-commercial or non-commercial, and by any means.
-
-In jurisdictions that recognize copyright laws, the author or authors of this
-software dedicate any and all copyright interest in the software to the public
-domain. We make this dedication for the benefit of the public at large and to
-the detriment of our heirs and successors. We intend this dedication to be an
-overt act of relinquishment in perpetuity of all present and future rights to
-this software under copyright law.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-For more information, please refer to <http://unlicense.org/>
-
-===============================================================================
-ALTERNATIVE 2 - MIT No Attribution
-===============================================================================
-Copyright 2020 David Reid
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-*/
+/*
+WAV audio loader and writer. Choice of public domain or MIT-0. See license statements at the end of this file.
+dr_wav - v0.12.5 - 2020-05-27
+
+David Reid - mackron@gmail.com
+
+GitHub: https://github.com/mackron/dr_libs
+*/
+
+/*
+RELEASE NOTES - VERSION 0.12
+============================
+Version 0.12 includes breaking changes to custom chunk handling.
+
+
+Changes to Chunk Callback
+-------------------------
+dr_wav supports the ability to fire a callback when a chunk is encounted (except for WAVE and FMT chunks). The callback has been updated to include both the
+container (RIFF or Wave64) and the FMT chunk which contains information about the format of the data in the wave file.
+
+Previously, there was no direct way to determine the container, and therefore no way discriminate against the different IDs in the chunk header (RIFF and
+Wave64 containers encode chunk ID's differently). The `container` parameter can be used to know which ID to use.
+
+Sometimes it can be useful to know the data format at the time the chunk callback is fired. A pointer to a `drwav_fmt` object is now passed into the chunk
+callback which will give you information about the data format. To determine the sample format, use `drwav_fmt_get_format()`. This will return one of the
+`DR_WAVE_FORMAT_*` tokens.
+*/
+
+/*
+Introduction
+============
+This is a single file library. To use it, do something like the following in one .c file.
+    
+    ```c
+    #define DR_WAV_IMPLEMENTATION
+    #include "dr_wav.h"
+    ```
+
+You can then #include this file in other parts of the program as you would with any other header file. Do something like the following to read audio data:
+
+    ```c
+    drwav wav;
+    if (!drwav_init_file(&wav, "my_song.wav", NULL)) {
+        // Error opening WAV file.
+    }
+
+    drwav_int32* pDecodedInterleavedPCMFrames = malloc(wav.totalPCMFrameCount * wav.channels * sizeof(drwav_int32));
+    size_t numberOfSamplesActuallyDecoded = drwav_read_pcm_frames_s32(&wav, wav.totalPCMFrameCount, pDecodedInterleavedPCMFrames);
+
+    ...
+
+    drwav_uninit(&wav);
+    ```
+
+If you just want to quickly open and read the audio data in a single operation you can do something like this:
+
+    ```c
+    unsigned int channels;
+    unsigned int sampleRate;
+    drwav_uint64 totalPCMFrameCount;
+    float* pSampleData = drwav_open_file_and_read_pcm_frames_f32("my_song.wav", &channels, &sampleRate, &totalPCMFrameCount, NULL);
+    if (pSampleData == NULL) {
+        // Error opening and reading WAV file.
+    }
+
+    ...
+
+    drwav_free(pSampleData);
+    ```
+
+The examples above use versions of the API that convert the audio data to a consistent format (32-bit signed PCM, in this case), but you can still output the
+audio data in its internal format (see notes below for supported formats):
+
+    ```c
+    size_t framesRead = drwav_read_pcm_frames(&wav, wav.totalPCMFrameCount, pDecodedInterleavedPCMFrames);
+    ```
+
+You can also read the raw bytes of audio data, which could be useful if dr_wav does not have native support for a particular data format:
+
+    ```c
+    size_t bytesRead = drwav_read_raw(&wav, bytesToRead, pRawDataBuffer);
+    ```
+
+dr_wav can also be used to output WAV files. This does not currently support compressed formats. To use this, look at `drwav_init_write()`,
+`drwav_init_file_write()`, etc. Use `drwav_write_pcm_frames()` to write samples, or `drwav_write_raw()` to write raw data in the "data" chunk.
+
+    ```c
+    drwav_data_format format;
+    format.container = drwav_container_riff;     // <-- drwav_container_riff = normal WAV files, drwav_container_w64 = Sony Wave64.
+    format.format = DR_WAVE_FORMAT_PCM;          // <-- Any of the DR_WAVE_FORMAT_* codes.
+    format.channels = 2;
+    format.sampleRate = 44100;
+    format.bitsPerSample = 16;
+    drwav_init_file_write(&wav, "data/recording.wav", &format, NULL);
+
+    ...
+
+    drwav_uint64 framesWritten = drwav_write_pcm_frames(pWav, frameCount, pSamples);
+    ```
+
+dr_wav has seamless support the Sony Wave64 format. The decoder will automatically detect it and it should Just Work without any manual intervention.
+
+
+Build Options
+=============
+#define these options before including this file.
+
+#define DR_WAV_NO_CONVERSION_API
+  Disables conversion APIs such as `drwav_read_pcm_frames_f32()` and `drwav_s16_to_f32()`.
+
+#define DR_WAV_NO_STDIO
+  Disables APIs that initialize a decoder from a file such as `drwav_init_file()`, `drwav_init_file_write()`, etc.
+
+
+
+Notes
+=====
+- Samples are always interleaved.
+- The default read function does not do any data conversion. Use `drwav_read_pcm_frames_f32()`, `drwav_read_pcm_frames_s32()` and `drwav_read_pcm_frames_s16()`
+  to read and convert audio data to 32-bit floating point, signed 32-bit integer and signed 16-bit integer samples respectively. Tested and supported internal
+  formats include the following:
+  - Unsigned 8-bit PCM
+  - Signed 12-bit PCM
+  - Signed 16-bit PCM
+  - Signed 24-bit PCM
+  - Signed 32-bit PCM
+  - IEEE 32-bit floating point
+  - IEEE 64-bit floating point
+  - A-law and u-law
+  - Microsoft ADPCM
+  - IMA ADPCM (DVI, format code 0x11)
+- dr_wav will try to read the WAV file as best it can, even if it's not strictly conformant to the WAV format.
+*/
+
+#ifndef dr_wav_h
+#define dr_wav_h
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define DRWAV_STRINGIFY(x)      #x
+#define DRWAV_XSTRINGIFY(x)     DRWAV_STRINGIFY(x)
+
+#define DRWAV_VERSION_MAJOR     0
+#define DRWAV_VERSION_MINOR     12
+#define DRWAV_VERSION_REVISION  5
+#define DRWAV_VERSION_STRING    DRWAV_XSTRINGIFY(DRWAV_VERSION_MAJOR) "." DRWAV_XSTRINGIFY(DRWAV_VERSION_MINOR) "." DRWAV_XSTRINGIFY(DRWAV_VERSION_REVISION)
+
+#include <stddef.h> /* For size_t. */
+
+/* Sized types. Prefer built-in types. Fall back to stdint. */
+#ifdef _MSC_VER
+    #if defined(__clang__)
+        #pragma GCC diagnostic push
+        #pragma GCC diagnostic ignored "-Wlanguage-extension-token"
+        #pragma GCC diagnostic ignored "-Wlong-long"        
+        #pragma GCC diagnostic ignored "-Wc++11-long-long"
+    #endif
+    typedef   signed __int8  drwav_int8;
+    typedef unsigned __int8  drwav_uint8;
+    typedef   signed __int16 drwav_int16;
+    typedef unsigned __int16 drwav_uint16;
+    typedef   signed __int32 drwav_int32;
+    typedef unsigned __int32 drwav_uint32;
+    typedef   signed __int64 drwav_int64;
+    typedef unsigned __int64 drwav_uint64;
+    #if defined(__clang__)
+        #pragma GCC diagnostic pop
+    #endif
+#else
+    #include <stdint.h>
+    typedef int8_t           drwav_int8;
+    typedef uint8_t          drwav_uint8;
+    typedef int16_t          drwav_int16;
+    typedef uint16_t         drwav_uint16;
+    typedef int32_t          drwav_int32;
+    typedef uint32_t         drwav_uint32;
+    typedef int64_t          drwav_int64;
+    typedef uint64_t         drwav_uint64;
+#endif
+typedef drwav_uint8          drwav_bool8;
+typedef drwav_uint32         drwav_bool32;
+#define DRWAV_TRUE           1
+#define DRWAV_FALSE          0
+
+#if !defined(DRWAV_API)
+    #if defined(DRWAV_DLL)
+        #if defined(_WIN32)
+            #define DRWAV_DLL_IMPORT  __declspec(dllimport)
+            #define DRWAV_DLL_EXPORT  __declspec(dllexport)
+            #define DRWAV_DLL_PRIVATE static
+        #else
+            #if defined(__GNUC__) && __GNUC__ >= 4
+                #define DRWAV_DLL_IMPORT  __attribute__((visibility("default")))
+                #define DRWAV_DLL_EXPORT  __attribute__((visibility("default")))
+                #define DRWAV_DLL_PRIVATE __attribute__((visibility("hidden")))
+            #else
+                #define DRWAV_DLL_IMPORT
+                #define DRWAV_DLL_EXPORT
+                #define DRWAV_DLL_PRIVATE static
+            #endif
+        #endif
+
+        #if defined(DR_WAV_IMPLEMENTATION) || defined(DRWAV_IMPLEMENTATION)
+            #define DRWAV_API  DRWAV_DLL_EXPORT
+        #else
+            #define DRWAV_API  DRWAV_DLL_IMPORT
+        #endif
+        #define DRWAV_PRIVATE DRWAV_DLL_PRIVATE
+    #else
+        #define DRWAV_API extern
+        #define DRWAV_PRIVATE static
+    #endif
+#endif
+
+typedef drwav_int32 drwav_result;
+#define DRWAV_SUCCESS                        0
+#define DRWAV_ERROR                         -1   /* A generic error. */
+#define DRWAV_INVALID_ARGS                  -2
+#define DRWAV_INVALID_OPERATION             -3
+#define DRWAV_OUT_OF_MEMORY                 -4
+#define DRWAV_OUT_OF_RANGE                  -5
+#define DRWAV_ACCESS_DENIED                 -6
+#define DRWAV_DOES_NOT_EXIST                -7
+#define DRWAV_ALREADY_EXISTS                -8
+#define DRWAV_TOO_MANY_OPEN_FILES           -9
+#define DRWAV_INVALID_FILE                  -10
+#define DRWAV_TOO_BIG                       -11
+#define DRWAV_PATH_TOO_LONG                 -12
+#define DRWAV_NAME_TOO_LONG                 -13
+#define DRWAV_NOT_DIRECTORY                 -14
+#define DRWAV_IS_DIRECTORY                  -15
+#define DRWAV_DIRECTORY_NOT_EMPTY           -16
+#define DRWAV_END_OF_FILE                   -17
+#define DRWAV_NO_SPACE                      -18
+#define DRWAV_BUSY                          -19
+#define DRWAV_IO_ERROR                      -20
+#define DRWAV_INTERRUPT                     -21
+#define DRWAV_UNAVAILABLE                   -22
+#define DRWAV_ALREADY_IN_USE                -23
+#define DRWAV_BAD_ADDRESS                   -24
+#define DRWAV_BAD_SEEK                      -25
+#define DRWAV_BAD_PIPE                      -26
+#define DRWAV_DEADLOCK                      -27
+#define DRWAV_TOO_MANY_LINKS                -28
+#define DRWAV_NOT_IMPLEMENTED               -29
+#define DRWAV_NO_MESSAGE                    -30
+#define DRWAV_BAD_MESSAGE                   -31
+#define DRWAV_NO_DATA_AVAILABLE             -32
+#define DRWAV_INVALID_DATA                  -33
+#define DRWAV_TIMEOUT                       -34
+#define DRWAV_NO_NETWORK                    -35
+#define DRWAV_NOT_UNIQUE                    -36
+#define DRWAV_NOT_SOCKET                    -37
+#define DRWAV_NO_ADDRESS                    -38
+#define DRWAV_BAD_PROTOCOL                  -39
+#define DRWAV_PROTOCOL_UNAVAILABLE          -40
+#define DRWAV_PROTOCOL_NOT_SUPPORTED        -41
+#define DRWAV_PROTOCOL_FAMILY_NOT_SUPPORTED -42
+#define DRWAV_ADDRESS_FAMILY_NOT_SUPPORTED  -43
+#define DRWAV_SOCKET_NOT_SUPPORTED          -44
+#define DRWAV_CONNECTION_RESET              -45
+#define DRWAV_ALREADY_CONNECTED             -46
+#define DRWAV_NOT_CONNECTED                 -47
+#define DRWAV_CONNECTION_REFUSED            -48
+#define DRWAV_NO_HOST                       -49
+#define DRWAV_IN_PROGRESS                   -50
+#define DRWAV_CANCELLED                     -51
+#define DRWAV_MEMORY_ALREADY_MAPPED         -52
+#define DRWAV_AT_END                        -53
+
+/* Common data formats. */
+#define DR_WAVE_FORMAT_PCM          0x1
+#define DR_WAVE_FORMAT_ADPCM        0x2
+#define DR_WAVE_FORMAT_IEEE_FLOAT   0x3
+#define DR_WAVE_FORMAT_ALAW         0x6
+#define DR_WAVE_FORMAT_MULAW        0x7
+#define DR_WAVE_FORMAT_DVI_ADPCM    0x11
+#define DR_WAVE_FORMAT_EXTENSIBLE   0xFFFE
+
+/* Constants. */
+#ifndef DRWAV_MAX_SMPL_LOOPS
+#define DRWAV_MAX_SMPL_LOOPS        1
+#endif
+
+/* Flags to pass into drwav_init_ex(), etc. */
+#define DRWAV_SEQUENTIAL            0x00000001
+
+DRWAV_API void drwav_version(drwav_uint32* pMajor, drwav_uint32* pMinor, drwav_uint32* pRevision);
+DRWAV_API const char* drwav_version_string();
+
+typedef enum
+{
+    drwav_seek_origin_start,
+    drwav_seek_origin_current
+} drwav_seek_origin;
+
+typedef enum
+{
+    drwav_container_riff,
+    drwav_container_w64
+} drwav_container;
+
+typedef struct
+{
+    union
+    {
+        drwav_uint8 fourcc[4];
+        drwav_uint8 guid[16];
+    } id;
+
+    /* The size in bytes of the chunk. */
+    drwav_uint64 sizeInBytes;
+
+    /*
+    RIFF = 2 byte alignment.
+    W64  = 8 byte alignment.
+    */
+    unsigned int paddingSize;
+} drwav_chunk_header;
+
+typedef struct
+{
+    /*
+    The format tag exactly as specified in the wave file's "fmt" chunk. This can be used by applications
+    that require support for data formats not natively supported by dr_wav.
+    */
+    drwav_uint16 formatTag;
+
+    /* The number of channels making up the audio data. When this is set to 1 it is mono, 2 is stereo, etc. */
+    drwav_uint16 channels;
+
+    /* The sample rate. Usually set to something like 44100. */
+    drwav_uint32 sampleRate;
+
+    /* Average bytes per second. You probably don't need this, but it's left here for informational purposes. */
+    drwav_uint32 avgBytesPerSec;
+
+    /* Block align. This is equal to the number of channels * bytes per sample. */
+    drwav_uint16 blockAlign;
+
+    /* Bits per sample. */
+    drwav_uint16 bitsPerSample;
+
+    /* The size of the extended data. Only used internally for validation, but left here for informational purposes. */
+    drwav_uint16 extendedSize;
+
+    /*
+    The number of valid bits per sample. When <formatTag> is equal to WAVE_FORMAT_EXTENSIBLE, <bitsPerSample>
+    is always rounded up to the nearest multiple of 8. This variable contains information about exactly how
+    many bits are valid per sample. Mainly used for informational purposes.
+    */
+    drwav_uint16 validBitsPerSample;
+
+    /* The channel mask. Not used at the moment. */
+    drwav_uint32 channelMask;
+
+    /* The sub-format, exactly as specified by the wave file. */
+    drwav_uint8 subFormat[16];
+} drwav_fmt;
+
+DRWAV_API drwav_uint16 drwav_fmt_get_format(const drwav_fmt* pFMT);
+
+
+/*
+Callback for when data is read. Return value is the number of bytes actually read.
+
+pUserData   [in]  The user data that was passed to drwav_init() and family.
+pBufferOut  [out] The output buffer.
+bytesToRead [in]  The number of bytes to read.
+
+Returns the number of bytes actually read.
+
+A return value of less than bytesToRead indicates the end of the stream. Do _not_ return from this callback until
+either the entire bytesToRead is filled or you have reached the end of the stream.
+*/
+typedef size_t (* drwav_read_proc)(void* pUserData, void* pBufferOut, size_t bytesToRead);
+
+/*
+Callback for when data is written. Returns value is the number of bytes actually written.
+
+pUserData    [in]  The user data that was passed to drwav_init_write() and family.
+pData        [out] A pointer to the data to write.
+bytesToWrite [in]  The number of bytes to write.
+
+Returns the number of bytes actually written.
+
+If the return value differs from bytesToWrite, it indicates an error.
+*/
+typedef size_t (* drwav_write_proc)(void* pUserData, const void* pData, size_t bytesToWrite);
+
+/*
+Callback for when data needs to be seeked.
+
+pUserData [in] The user data that was passed to drwav_init() and family.
+offset    [in] The number of bytes to move, relative to the origin. Will never be negative.
+origin    [in] The origin of the seek - the current position or the start of the stream.
+
+Returns whether or not the seek was successful.
+
+Whether or not it is relative to the beginning or current position is determined by the "origin" parameter which will be either drwav_seek_origin_start or
+drwav_seek_origin_current.
+*/
+typedef drwav_bool32 (* drwav_seek_proc)(void* pUserData, int offset, drwav_seek_origin origin);
+
+/*
+Callback for when drwav_init_ex() finds a chunk.
+
+pChunkUserData    [in] The user data that was passed to the pChunkUserData parameter of drwav_init_ex() and family.
+onRead            [in] A pointer to the function to call when reading.
+onSeek            [in] A pointer to the function to call when seeking.
+pReadSeekUserData [in] The user data that was passed to the pReadSeekUserData parameter of drwav_init_ex() and family.
+pChunkHeader      [in] A pointer to an object containing basic header information about the chunk. Use this to identify the chunk.
+container         [in] Whether or not the WAV file is a RIFF or Wave64 container. If you're unsure of the difference, assume RIFF.
+pFMT              [in] A pointer to the object containing the contents of the "fmt" chunk.
+
+Returns the number of bytes read + seeked.
+
+To read data from the chunk, call onRead(), passing in pReadSeekUserData as the first parameter. Do the same for seeking with onSeek(). The return value must
+be the total number of bytes you have read _plus_ seeked.
+
+Use the `container` argument to discriminate the fields in `pChunkHeader->id`. If the container is `drwav_container_riff` you should use `id.fourcc`,
+otherwise you should use `id.guid`.
+
+The `pFMT` parameter can be used to determine the data format of the wave file. Use `drwav_fmt_get_format()` to get the sample format, which will be one of the
+`DR_WAVE_FORMAT_*` identifiers. 
+
+The read pointer will be sitting on the first byte after the chunk's header. You must not attempt to read beyond the boundary of the chunk.
+*/
+typedef drwav_uint64 (* drwav_chunk_proc)(void* pChunkUserData, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pReadSeekUserData, const drwav_chunk_header* pChunkHeader, drwav_container container, const drwav_fmt* pFMT);
+
+typedef struct
+{
+    void* pUserData;
+    void* (* onMalloc)(size_t sz, void* pUserData);
+    void* (* onRealloc)(void* p, size_t sz, void* pUserData);
+    void  (* onFree)(void* p, void* pUserData);
+} drwav_allocation_callbacks;
+
+/* Structure for internal use. Only used for loaders opened with drwav_init_memory(). */
+typedef struct
+{
+    const drwav_uint8* data;
+    size_t dataSize;
+    size_t currentReadPos;
+} drwav__memory_stream;
+
+/* Structure for internal use. Only used for writers opened with drwav_init_memory_write(). */
+typedef struct
+{
+    void** ppData;
+    size_t* pDataSize;
+    size_t dataSize;
+    size_t dataCapacity;
+    size_t currentWritePos;
+} drwav__memory_stream_write;
+
+typedef struct
+{
+    drwav_container container;  /* RIFF, W64. */
+    drwav_uint32 format;        /* DR_WAVE_FORMAT_* */
+    drwav_uint32 channels;
+    drwav_uint32 sampleRate;
+    drwav_uint32 bitsPerSample;
+} drwav_data_format;
+
+
+/* See the following for details on the 'smpl' chunk: https://sites.google.com/site/musicgapi/technical-documents/wav-file-format#smpl */
+typedef struct
+{
+    drwav_uint32 cuePointId;
+    drwav_uint32 type;
+    drwav_uint32 start;
+    drwav_uint32 end;
+    drwav_uint32 fraction;
+    drwav_uint32 playCount;
+} drwav_smpl_loop;
+
+ typedef struct
+{
+    drwav_uint32 manufacturer;
+    drwav_uint32 product;
+    drwav_uint32 samplePeriod;
+    drwav_uint32 midiUnityNotes;
+    drwav_uint32 midiPitchFraction;
+    drwav_uint32 smpteFormat;
+    drwav_uint32 smpteOffset;
+    drwav_uint32 numSampleLoops;
+    drwav_uint32 samplerData;
+    drwav_smpl_loop loops[DRWAV_MAX_SMPL_LOOPS];
+} drwav_smpl;
+
+typedef struct
+{
+    /* A pointer to the function to call when more data is needed. */
+    drwav_read_proc onRead;
+
+    /* A pointer to the function to call when data needs to be written. Only used when the drwav object is opened in write mode. */
+    drwav_write_proc onWrite;
+
+    /* A pointer to the function to call when the wav file needs to be seeked. */
+    drwav_seek_proc onSeek;
+
+    /* The user data to pass to callbacks. */
+    void* pUserData;
+
+    /* Allocation callbacks. */
+    drwav_allocation_callbacks allocationCallbacks;
+
+
+    /* Whether or not the WAV file is formatted as a standard RIFF file or W64. */
+    drwav_container container;
+
+
+    /* Structure containing format information exactly as specified by the wav file. */
+    drwav_fmt fmt;
+
+    /* The sample rate. Will be set to something like 44100. */
+    drwav_uint32 sampleRate;
+
+    /* The number of channels. This will be set to 1 for monaural streams, 2 for stereo, etc. */
+    drwav_uint16 channels;
+
+    /* The bits per sample. Will be set to something like 16, 24, etc. */
+    drwav_uint16 bitsPerSample;
+
+    /* Equal to fmt.formatTag, or the value specified by fmt.subFormat if fmt.formatTag is equal to 65534 (WAVE_FORMAT_EXTENSIBLE). */
+    drwav_uint16 translatedFormatTag;
+
+    /* The total number of PCM frames making up the audio data. */
+    drwav_uint64 totalPCMFrameCount;
+
+
+    /* The size in bytes of the data chunk. */
+    drwav_uint64 dataChunkDataSize;
+    
+    /* The position in the stream of the first byte of the data chunk. This is used for seeking. */
+    drwav_uint64 dataChunkDataPos;
+
+    /* The number of bytes remaining in the data chunk. */
+    drwav_uint64 bytesRemaining;
+
+
+    /*
+    Only used in sequential write mode. Keeps track of the desired size of the "data" chunk at the point of initialization time. Always
+    set to 0 for non-sequential writes and when the drwav object is opened in read mode. Used for validation.
+    */
+    drwav_uint64 dataChunkDataSizeTargetWrite;
+
+    /* Keeps track of whether or not the wav writer was initialized in sequential mode. */
+    drwav_bool32 isSequentialWrite;
+
+
+    /* smpl chunk. */
+    drwav_smpl smpl;
+
+
+    /* A hack to avoid a DRWAV_MALLOC() when opening a decoder with drwav_init_memory(). */
+    drwav__memory_stream memoryStream;
+    drwav__memory_stream_write memoryStreamWrite;
+
+    /* Generic data for compressed formats. This data is shared across all block-compressed formats. */
+    struct
+    {
+        drwav_uint64 iCurrentPCMFrame;  /* The index of the next PCM frame that will be read by drwav_read_*(). This is used with "totalPCMFrameCount" to ensure we don't read excess samples at the end of the last block. */
+    } compressed;
+    
+    /* Microsoft ADPCM specific data. */
+    struct
+    {
+        drwav_uint32 bytesRemainingInBlock;
+        drwav_uint16 predictor[2];
+        drwav_int32  delta[2];
+        drwav_int32  cachedFrames[4];  /* Samples are stored in this cache during decoding. */
+        drwav_uint32 cachedFrameCount;
+        drwav_int32  prevFrames[2][2]; /* The previous 2 samples for each channel (2 channels at most). */
+    } msadpcm;
+
+    /* IMA ADPCM specific data. */
+    struct
+    {
+        drwav_uint32 bytesRemainingInBlock;
+        drwav_int32  predictor[2];
+        drwav_int32  stepIndex[2];
+        drwav_int32  cachedFrames[16]; /* Samples are stored in this cache during decoding. */
+        drwav_uint32 cachedFrameCount;
+    } ima;
+} drwav;
+
+
+/*
+Initializes a pre-allocated drwav object for reading.
+
+pWav                         [out]          A pointer to the drwav object being initialized.
+onRead                       [in]           The function to call when data needs to be read from the client.
+onSeek                       [in]           The function to call when the read position of the client data needs to move.
+onChunk                      [in, optional] The function to call when a chunk is enumerated at initialized time.
+pUserData, pReadSeekUserData [in, optional] A pointer to application defined data that will be passed to onRead and onSeek.
+pChunkUserData               [in, optional] A pointer to application defined data that will be passed to onChunk.
+flags                        [in, optional] A set of flags for controlling how things are loaded.
+
+Returns true if successful; false otherwise.
+
+Close the loader with drwav_uninit().
+
+This is the lowest level function for initializing a WAV file. You can also use drwav_init_file() and drwav_init_memory()
+to open the stream from a file or from a block of memory respectively.
+
+Possible values for flags:
+  DRWAV_SEQUENTIAL: Never perform a backwards seek while loading. This disables the chunk callback and will cause this function
+                    to return as soon as the data chunk is found. Any chunks after the data chunk will be ignored.
+
+drwav_init() is equivalent to "drwav_init_ex(pWav, onRead, onSeek, NULL, pUserData, NULL, 0);".
+
+The onChunk callback is not called for the WAVE or FMT chunks. The contents of the FMT chunk can be read from pWav->fmt
+after the function returns.
+
+See also: drwav_init_file(), drwav_init_memory(), drwav_uninit()
+*/
+DRWAV_API drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Initializes a pre-allocated drwav object for writing.
+
+onWrite   [in]           The function to call when data needs to be written.
+onSeek    [in]           The function to call when the write position needs to move.
+pUserData [in, optional] A pointer to application defined data that will be passed to onWrite and onSeek.
+
+Returns true if successful; false otherwise.
+
+Close the writer with drwav_uninit().
+
+This is the lowest level function for initializing a WAV file. You can also use drwav_init_file_write() and drwav_init_memory_write()
+to open the stream from a file or from a block of memory respectively.
+
+If the total sample count is known, you can use drwav_init_write_sequential(). This avoids the need for dr_wav to perform
+a post-processing step for storing the total sample count and the size of the data chunk which requires a backwards seek.
+
+See also: drwav_init_file_write(), drwav_init_memory_write(), drwav_uninit()
+*/
+DRWAV_API drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_write_sequential(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_write_sequential_pcm_frames(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Utility function to determine the target size of the entire data to be written (including all headers and chunks).
+
+Returns the target size in bytes.
+
+Useful if the application needs to know the size to allocate.
+
+Only writing to the RIFF chunk and one data chunk is currently supported.
+
+See also: drwav_init_write(), drwav_init_file_write(), drwav_init_memory_write()
+*/
+DRWAV_API drwav_uint64 drwav_target_write_size_bytes(const drwav_data_format* pFormat, drwav_uint64 totalSampleCount);
+
+/*
+Uninitializes the given drwav object.
+
+Use this only for objects initialized with drwav_init*() functions (drwav_init(), drwav_init_ex(), drwav_init_write(), drwav_init_write_sequential()).
+*/
+DRWAV_API drwav_result drwav_uninit(drwav* pWav);
+
+
+/*
+Reads raw audio data.
+
+This is the lowest level function for reading audio data. It simply reads the given number of
+bytes of the raw internal sample data.
+
+Consider using drwav_read_pcm_frames_s16(), drwav_read_pcm_frames_s32() or drwav_read_pcm_frames_f32() for
+reading sample data in a consistent format.
+
+Returns the number of bytes actually read.
+*/
+DRWAV_API size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut);
+
+/*
+Reads up to the specified number of PCM frames from the WAV file.
+
+The output data will be in the file's internal format, converted to native-endian byte order. Use
+drwav_read_pcm_frames_s16/f32/s32() to read data in a specific format.
+
+If the return value is less than <framesToRead> it means the end of the file has been reached or
+you have requested more PCM frames than can possibly fit in the output buffer.
+
+This function will only work when sample data is of a fixed size and uncompressed. If you are
+using a compressed format consider using drwav_read_raw() or drwav_read_pcm_frames_s16/s32/f32().
+*/
+DRWAV_API drwav_uint64 drwav_read_pcm_frames(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_le(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_be(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut);
+
+/*
+Seeks to the given PCM frame.
+
+Returns true if successful; false otherwise.
+*/
+DRWAV_API drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64 targetFrameIndex);
+
+
+/*
+Writes raw audio data.
+
+Returns the number of bytes actually written. If this differs from bytesToWrite, it indicates an error.
+*/
+DRWAV_API size_t drwav_write_raw(drwav* pWav, size_t bytesToWrite, const void* pData);
+
+/*
+Writes PCM frames.
+
+Returns the number of PCM frames written.
+
+Input samples need to be in native-endian byte order. On big-endian architectures the input data will be converted to
+little-endian. Use drwav_write_raw() to write raw audio data without performing any conversion.
+*/
+DRWAV_API drwav_uint64 drwav_write_pcm_frames(drwav* pWav, drwav_uint64 framesToWrite, const void* pData);
+DRWAV_API drwav_uint64 drwav_write_pcm_frames_le(drwav* pWav, drwav_uint64 framesToWrite, const void* pData);
+DRWAV_API drwav_uint64 drwav_write_pcm_frames_be(drwav* pWav, drwav_uint64 framesToWrite, const void* pData);
+
+
+/* Conversion Utilities */
+#ifndef DR_WAV_NO_CONVERSION_API
+
+/*
+Reads a chunk of audio data and converts it to signed 16-bit PCM samples.
+
+Returns the number of PCM frames actually read.
+
+If the return value is less than <framesToRead> it means the end of the file has been reached.
+*/
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16le(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16be(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut);
+
+/* Low-level function for converting unsigned 8-bit PCM samples to signed 16-bit PCM samples. */
+DRWAV_API void drwav_u8_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting signed 24-bit PCM samples to signed 16-bit PCM samples. */
+DRWAV_API void drwav_s24_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting signed 32-bit PCM samples to signed 16-bit PCM samples. */
+DRWAV_API void drwav_s32_to_s16(drwav_int16* pOut, const drwav_int32* pIn, size_t sampleCount);
+
+/* Low-level function for converting IEEE 32-bit floating point samples to signed 16-bit PCM samples. */
+DRWAV_API void drwav_f32_to_s16(drwav_int16* pOut, const float* pIn, size_t sampleCount);
+
+/* Low-level function for converting IEEE 64-bit floating point samples to signed 16-bit PCM samples. */
+DRWAV_API void drwav_f64_to_s16(drwav_int16* pOut, const double* pIn, size_t sampleCount);
+
+/* Low-level function for converting A-law samples to signed 16-bit PCM samples. */
+DRWAV_API void drwav_alaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting u-law samples to signed 16-bit PCM samples. */
+DRWAV_API void drwav_mulaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+
+/*
+Reads a chunk of audio data and converts it to IEEE 32-bit floating point samples.
+
+Returns the number of PCM frames actually read.
+
+If the return value is less than <framesToRead> it means the end of the file has been reached.
+*/
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32le(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32be(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut);
+
+/* Low-level function for converting unsigned 8-bit PCM samples to IEEE 32-bit floating point samples. */
+DRWAV_API void drwav_u8_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting signed 16-bit PCM samples to IEEE 32-bit floating point samples. */
+DRWAV_API void drwav_s16_to_f32(float* pOut, const drwav_int16* pIn, size_t sampleCount);
+
+/* Low-level function for converting signed 24-bit PCM samples to IEEE 32-bit floating point samples. */
+DRWAV_API void drwav_s24_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting signed 32-bit PCM samples to IEEE 32-bit floating point samples. */
+DRWAV_API void drwav_s32_to_f32(float* pOut, const drwav_int32* pIn, size_t sampleCount);
+
+/* Low-level function for converting IEEE 64-bit floating point samples to IEEE 32-bit floating point samples. */
+DRWAV_API void drwav_f64_to_f32(float* pOut, const double* pIn, size_t sampleCount);
+
+/* Low-level function for converting A-law samples to IEEE 32-bit floating point samples. */
+DRWAV_API void drwav_alaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting u-law samples to IEEE 32-bit floating point samples. */
+DRWAV_API void drwav_mulaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+
+/*
+Reads a chunk of audio data and converts it to signed 32-bit PCM samples.
+
+Returns the number of PCM frames actually read.
+
+If the return value is less than <framesToRead> it means the end of the file has been reached.
+*/
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32le(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut);
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32be(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut);
+
+/* Low-level function for converting unsigned 8-bit PCM samples to signed 32-bit PCM samples. */
+DRWAV_API void drwav_u8_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting signed 16-bit PCM samples to signed 32-bit PCM samples. */
+DRWAV_API void drwav_s16_to_s32(drwav_int32* pOut, const drwav_int16* pIn, size_t sampleCount);
+
+/* Low-level function for converting signed 24-bit PCM samples to signed 32-bit PCM samples. */
+DRWAV_API void drwav_s24_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting IEEE 32-bit floating point samples to signed 32-bit PCM samples. */
+DRWAV_API void drwav_f32_to_s32(drwav_int32* pOut, const float* pIn, size_t sampleCount);
+
+/* Low-level function for converting IEEE 64-bit floating point samples to signed 32-bit PCM samples. */
+DRWAV_API void drwav_f64_to_s32(drwav_int32* pOut, const double* pIn, size_t sampleCount);
+
+/* Low-level function for converting A-law samples to signed 32-bit PCM samples. */
+DRWAV_API void drwav_alaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+/* Low-level function for converting u-law samples to signed 32-bit PCM samples. */
+DRWAV_API void drwav_mulaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount);
+
+#endif  /* DR_WAV_NO_CONVERSION_API */
+
+
+/* High-Level Convenience Helpers */
+
+#ifndef DR_WAV_NO_STDIO
+/*
+Helper for initializing a wave file for reading using stdio.
+
+This holds the internal FILE object until drwav_uninit() is called. Keep this in mind if you're caching drwav
+objects because the operating system may restrict the number of file handles an application can have open at
+any given time.
+*/
+DRWAV_API drwav_bool32 drwav_init_file(drwav* pWav, const char* filename, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_ex(drwav* pWav, const char* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_w(drwav* pWav, const wchar_t* filename, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_ex_w(drwav* pWav, const wchar_t* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Helper for initializing a wave file for writing using stdio.
+
+This holds the internal FILE object until drwav_uninit() is called. Keep this in mind if you're caching drwav
+objects because the operating system may restrict the number of file handles an application can have open at
+any given time.
+*/
+DRWAV_API drwav_bool32 drwav_init_file_write(drwav* pWav, const char* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_write_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks);
+#endif  /* DR_WAV_NO_STDIO */
+
+/*
+Helper for initializing a loader from a pre-allocated memory buffer.
+
+This does not create a copy of the data. It is up to the application to ensure the buffer remains valid for
+the lifetime of the drwav object.
+
+The buffer should contain the contents of the entire wave file, not just the sample data.
+*/
+DRWAV_API drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_memory_ex(drwav* pWav, const void* data, size_t dataSize, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks);
+
+/*
+Helper for initializing a writer which outputs data to a memory buffer.
+
+dr_wav will manage the memory allocations, however it is up to the caller to free the data with drwav_free().
+
+The buffer will remain allocated even after drwav_uninit() is called. Indeed, the buffer should not be
+considered valid until after drwav_uninit() has been called anyway.
+*/
+DRWAV_API drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_memory_write_sequential(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_bool32 drwav_init_memory_write_sequential_pcm_frames(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks);
+
+
+#ifndef DR_WAV_NO_CONVERSION_API
+/*
+Opens and reads an entire wav file in a single operation.
+
+The return value is a heap-allocated buffer containing the audio data. Use drwav_free() to free the buffer.
+*/
+DRWAV_API drwav_int16* drwav_open_and_read_pcm_frames_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API float* drwav_open_and_read_pcm_frames_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_int32* drwav_open_and_read_pcm_frames_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+#ifndef DR_WAV_NO_STDIO
+/*
+Opens and decodes an entire wav file in a single operation.
+
+The return value is a heap-allocated buffer containing the audio data. Use drwav_free() to free the buffer.
+*/
+DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API float* drwav_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API float* drwav_open_file_and_read_pcm_frames_f32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+#endif
+/*
+Opens and decodes an entire wav file from a block of memory in a single operation.
+
+The return value is a heap-allocated buffer containing the audio data. Use drwav_free() to free the buffer.
+*/
+DRWAV_API drwav_int16* drwav_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API float* drwav_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+DRWAV_API drwav_int32* drwav_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks);
+#endif
+
+/* Frees data that was allocated internally by dr_wav. */
+DRWAV_API void drwav_free(void* p, const drwav_allocation_callbacks* pAllocationCallbacks);
+
+/* Converts bytes from a wav stream to a sized type of native endian. */
+DRWAV_API drwav_uint16 drwav_bytes_to_u16(const drwav_uint8* data);
+DRWAV_API drwav_int16 drwav_bytes_to_s16(const drwav_uint8* data);
+DRWAV_API drwav_uint32 drwav_bytes_to_u32(const drwav_uint8* data);
+DRWAV_API drwav_int32 drwav_bytes_to_s32(const drwav_uint8* data);
+DRWAV_API drwav_uint64 drwav_bytes_to_u64(const drwav_uint8* data);
+DRWAV_API drwav_int64 drwav_bytes_to_s64(const drwav_uint8* data);
+
+/* Compares a GUID for the purpose of checking the type of a Wave64 chunk. */
+DRWAV_API drwav_bool32 drwav_guid_equal(const drwav_uint8 a[16], const drwav_uint8 b[16]);
+
+/* Compares a four-character-code for the purpose of checking the type of a RIFF chunk. */
+DRWAV_API drwav_bool32 drwav_fourcc_equal(const drwav_uint8* a, const char* b);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  /* dr_wav_h */
+
+
+/************************************************************************************************************************************************************
+ ************************************************************************************************************************************************************
+
+ IMPLEMENTATION
+
+ ************************************************************************************************************************************************************
+ ************************************************************************************************************************************************************/
+#if defined(DR_WAV_IMPLEMENTATION) || defined(DRWAV_IMPLEMENTATION)
+#include <stdlib.h>
+#include <string.h> /* For memcpy(), memset() */
+#include <limits.h> /* For INT_MAX */
+
+#ifndef DR_WAV_NO_STDIO
+#include <stdio.h>
+#include <wchar.h>
+#endif
+
+/* Standard library stuff. */
+#ifndef DRWAV_ASSERT
+#include <assert.h>
+#define DRWAV_ASSERT(expression)           assert(expression)
+#endif
+#ifndef DRWAV_MALLOC
+#define DRWAV_MALLOC(sz)                   malloc((sz))
+#endif
+#ifndef DRWAV_REALLOC
+#define DRWAV_REALLOC(p, sz)               realloc((p), (sz))
+#endif
+#ifndef DRWAV_FREE
+#define DRWAV_FREE(p)                      free((p))
+#endif
+#ifndef DRWAV_COPY_MEMORY
+#define DRWAV_COPY_MEMORY(dst, src, sz)    memcpy((dst), (src), (sz))
+#endif
+#ifndef DRWAV_ZERO_MEMORY
+#define DRWAV_ZERO_MEMORY(p, sz)           memset((p), 0, (sz))
+#endif
+#ifndef DRWAV_ZERO_OBJECT
+#define DRWAV_ZERO_OBJECT(p)               DRWAV_ZERO_MEMORY((p), sizeof(*p))
+#endif
+
+#define drwav_countof(x)                   (sizeof(x) / sizeof(x[0]))
+#define drwav_align(x, a)                  ((((x) + (a) - 1) / (a)) * (a))
+#define drwav_min(a, b)                    (((a) < (b)) ? (a) : (b))
+#define drwav_max(a, b)                    (((a) > (b)) ? (a) : (b))
+#define drwav_clamp(x, lo, hi)             (drwav_max((lo), drwav_min((hi), (x))))
+
+#define DRWAV_MAX_SIMD_VECTOR_SIZE         64  /* 64 for AVX-512 in the future. */
+
+/* CPU architecture. */
+#if defined(__x86_64__) || defined(_M_X64)
+    #define DRWAV_X64
+#elif defined(__i386) || defined(_M_IX86)
+    #define DRWAV_X86
+#elif defined(__arm__) || defined(_M_ARM)
+    #define DRWAV_ARM
+#endif
+
+#ifdef _MSC_VER
+    #define DRWAV_INLINE __forceinline
+#elif defined(__GNUC__)
+    /*
+    I've had a bug report where GCC is emitting warnings about functions possibly not being inlineable. This warning happens when
+    the __attribute__((always_inline)) attribute is defined without an "inline" statement. I think therefore there must be some
+    case where "__inline__" is not always defined, thus the compiler emitting these warnings. When using -std=c89 or -ansi on the
+    command line, we cannot use the "inline" keyword and instead need to use "__inline__". In an attempt to work around this issue
+    I am using "__inline__" only when we're compiling in strict ANSI mode.
+    */
+    #if defined(__STRICT_ANSI__)
+        #define DRWAV_INLINE __inline__ __attribute__((always_inline))
+    #else
+        #define DRWAV_INLINE inline __attribute__((always_inline))
+    #endif
+#else
+    #define DRWAV_INLINE
+#endif
+
+#if defined(SIZE_MAX)
+    #define DRWAV_SIZE_MAX  SIZE_MAX
+#else
+    #if defined(_WIN64) || defined(_LP64) || defined(__LP64__)
+        #define DRWAV_SIZE_MAX  ((drwav_uint64)0xFFFFFFFFFFFFFFFF)
+    #else
+        #define DRWAV_SIZE_MAX  0xFFFFFFFF
+    #endif
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+    #define DRWAV_HAS_BYTESWAP16_INTRINSIC
+    #define DRWAV_HAS_BYTESWAP32_INTRINSIC
+    #define DRWAV_HAS_BYTESWAP64_INTRINSIC
+#elif defined(__clang__)
+    #if defined(__has_builtin)
+        #if __has_builtin(__builtin_bswap16)
+            #define DRWAV_HAS_BYTESWAP16_INTRINSIC
+        #endif
+        #if __has_builtin(__builtin_bswap32)
+            #define DRWAV_HAS_BYTESWAP32_INTRINSIC
+        #endif
+        #if __has_builtin(__builtin_bswap64)
+            #define DRWAV_HAS_BYTESWAP64_INTRINSIC
+        #endif
+    #endif
+#elif defined(__GNUC__)
+    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
+        #define DRWAV_HAS_BYTESWAP32_INTRINSIC
+        #define DRWAV_HAS_BYTESWAP64_INTRINSIC
+    #endif
+    #if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))
+        #define DRWAV_HAS_BYTESWAP16_INTRINSIC
+    #endif
+#endif
+
+DRWAV_API void drwav_version(drwav_uint32* pMajor, drwav_uint32* pMinor, drwav_uint32* pRevision)
+{
+    if (pMajor) {
+        *pMajor = DRWAV_VERSION_MAJOR;
+    }
+
+    if (pMinor) {
+        *pMinor = DRWAV_VERSION_MINOR;
+    }
+
+    if (pRevision) {
+        *pRevision = DRWAV_VERSION_REVISION;
+    }
+}
+
+DRWAV_API const char* drwav_version_string()
+{
+    return DRWAV_VERSION_STRING;
+}
+
+/*
+These limits are used for basic validation when initializing the decoder. If you exceed these limits, first of all: what on Earth are
+you doing?! (Let me know, I'd be curious!) Second, you can adjust these by #define-ing them before the dr_wav implementation.
+*/
+#ifndef DRWAV_MAX_SAMPLE_RATE
+#define DRWAV_MAX_SAMPLE_RATE       384000
+#endif
+#ifndef DRWAV_MAX_CHANNELS
+#define DRWAV_MAX_CHANNELS          256
+#endif
+#ifndef DRWAV_MAX_BITS_PER_SAMPLE
+#define DRWAV_MAX_BITS_PER_SAMPLE   64
+#endif
+
+static const drwav_uint8 drwavGUID_W64_RIFF[16] = {0x72,0x69,0x66,0x66, 0x2E,0x91, 0xCF,0x11, 0xA5,0xD6, 0x28,0xDB,0x04,0xC1,0x00,0x00};    /* 66666972-912E-11CF-A5D6-28DB04C10000 */
+static const drwav_uint8 drwavGUID_W64_WAVE[16] = {0x77,0x61,0x76,0x65, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 65766177-ACF3-11D3-8CD1-00C04F8EDB8A */
+static const drwav_uint8 drwavGUID_W64_JUNK[16] = {0x6A,0x75,0x6E,0x6B, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 6B6E756A-ACF3-11D3-8CD1-00C04F8EDB8A */
+static const drwav_uint8 drwavGUID_W64_FMT [16] = {0x66,0x6D,0x74,0x20, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 20746D66-ACF3-11D3-8CD1-00C04F8EDB8A */
+static const drwav_uint8 drwavGUID_W64_FACT[16] = {0x66,0x61,0x63,0x74, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 74636166-ACF3-11D3-8CD1-00C04F8EDB8A */
+static const drwav_uint8 drwavGUID_W64_DATA[16] = {0x64,0x61,0x74,0x61, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 61746164-ACF3-11D3-8CD1-00C04F8EDB8A */
+static const drwav_uint8 drwavGUID_W64_SMPL[16] = {0x73,0x6D,0x70,0x6C, 0xF3,0xAC, 0xD3,0x11, 0x8C,0xD1, 0x00,0xC0,0x4F,0x8E,0xDB,0x8A};    /* 6C706D73-ACF3-11D3-8CD1-00C04F8EDB8A */
+
+static DRWAV_INLINE drwav_bool32 drwav__guid_equal(const drwav_uint8 a[16], const drwav_uint8 b[16])
+{
+    int i;
+    for (i = 0; i < 16; i += 1) {
+        if (a[i] != b[i]) {
+            return DRWAV_FALSE;
+        }
+    }
+
+    return DRWAV_TRUE;
+}
+
+static DRWAV_INLINE drwav_bool32 drwav__fourcc_equal(const drwav_uint8* a, const char* b)
+{
+    return
+        a[0] == b[0] &&
+        a[1] == b[1] &&
+        a[2] == b[2] &&
+        a[3] == b[3];
+}
+
+
+
+static DRWAV_INLINE int drwav__is_little_endian(void)
+{
+#if defined(DRWAV_X86) || defined(DRWAV_X64)
+    return DRWAV_TRUE;
+#elif defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && __BYTE_ORDER == __LITTLE_ENDIAN
+    return DRWAV_TRUE;
+#else
+    int n = 1;
+    return (*(char*)&n) == 1;
+#endif
+}
+
+static DRWAV_INLINE drwav_uint16 drwav__bytes_to_u16(const drwav_uint8* data)
+{
+    return (data[0] << 0) | (data[1] << 8);
+}
+
+static DRWAV_INLINE drwav_int16 drwav__bytes_to_s16(const drwav_uint8* data)
+{
+    return (short)drwav__bytes_to_u16(data);
+}
+
+static DRWAV_INLINE drwav_uint32 drwav__bytes_to_u32(const drwav_uint8* data)
+{
+    return (data[0] << 0) | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
+}
+
+static DRWAV_INLINE drwav_int32 drwav__bytes_to_s32(const drwav_uint8* data)
+{
+    return (drwav_int32)drwav__bytes_to_u32(data);
+}
+
+static DRWAV_INLINE drwav_uint64 drwav__bytes_to_u64(const drwav_uint8* data)
+{
+    return
+        ((drwav_uint64)data[0] <<  0) | ((drwav_uint64)data[1] <<  8) | ((drwav_uint64)data[2] << 16) | ((drwav_uint64)data[3] << 24) |
+        ((drwav_uint64)data[4] << 32) | ((drwav_uint64)data[5] << 40) | ((drwav_uint64)data[6] << 48) | ((drwav_uint64)data[7] << 56);
+}
+
+static DRWAV_INLINE drwav_int64 drwav__bytes_to_s64(const drwav_uint8* data)
+{
+    return (drwav_int64)drwav__bytes_to_u64(data);
+}
+
+static DRWAV_INLINE void drwav__bytes_to_guid(const drwav_uint8* data, drwav_uint8* guid)
+{
+    int i;
+    for (i = 0; i < 16; ++i) {
+        guid[i] = data[i];
+    }
+}
+
+
+static DRWAV_INLINE drwav_uint16 drwav__bswap16(drwav_uint16 n)
+{
+#ifdef DRWAV_HAS_BYTESWAP16_INTRINSIC
+    #if defined(_MSC_VER)
+        return _byteswap_ushort(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        return __builtin_bswap16(n);
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
+#else
+    return ((n & 0xFF00) >> 8) |
+           ((n & 0x00FF) << 8);
+#endif
+}
+
+static DRWAV_INLINE drwav_uint32 drwav__bswap32(drwav_uint32 n)
+{
+#ifdef DRWAV_HAS_BYTESWAP32_INTRINSIC
+    #if defined(_MSC_VER)
+        return _byteswap_ulong(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        #if defined(DRWAV_ARM) && (defined(__ARM_ARCH) && __ARM_ARCH >= 6) && !defined(DRWAV_64BIT)   /* <-- 64-bit inline assembly has not been tested, so disabling for now. */
+            /* Inline assembly optimized implementation for ARM. In my testing, GCC does not generate optimized code with __builtin_bswap32(). */
+            drwav_uint32 r;
+            __asm__ __volatile__ (
+            #if defined(DRWAV_64BIT)
+                "rev %w[out], %w[in]" : [out]"=r"(r) : [in]"r"(n)   /* <-- This is untested. If someone in the community could test this, that would be appreciated! */
+            #else
+                "rev %[out], %[in]" : [out]"=r"(r) : [in]"r"(n)
+            #endif
+            );
+            return r;
+        #else
+            return __builtin_bswap32(n);
+        #endif
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
+#else
+    return ((n & 0xFF000000) >> 24) |
+           ((n & 0x00FF0000) >>  8) |
+           ((n & 0x0000FF00) <<  8) |
+           ((n & 0x000000FF) << 24);
+#endif
+}
+
+static DRWAV_INLINE drwav_uint64 drwav__bswap64(drwav_uint64 n)
+{
+#ifdef DRWAV_HAS_BYTESWAP64_INTRINSIC
+    #if defined(_MSC_VER)
+        return _byteswap_uint64(n);
+    #elif defined(__GNUC__) || defined(__clang__)
+        return __builtin_bswap64(n);
+    #else
+        #error "This compiler does not support the byte swap intrinsic."
+    #endif
+#else
+    return ((n & (drwav_uint64)0xFF00000000000000) >> 56) |
+           ((n & (drwav_uint64)0x00FF000000000000) >> 40) |
+           ((n & (drwav_uint64)0x0000FF0000000000) >> 24) |
+           ((n & (drwav_uint64)0x000000FF00000000) >>  8) |
+           ((n & (drwav_uint64)0x00000000FF000000) <<  8) |
+           ((n & (drwav_uint64)0x0000000000FF0000) << 24) |
+           ((n & (drwav_uint64)0x000000000000FF00) << 40) |
+           ((n & (drwav_uint64)0x00000000000000FF) << 56);
+#endif
+}
+
+
+static DRWAV_INLINE drwav_int16 drwav__bswap_s16(drwav_int16 n)
+{
+    return (drwav_int16)drwav__bswap16((drwav_uint16)n);
+}
+
+static DRWAV_INLINE void drwav__bswap_samples_s16(drwav_int16* pSamples, drwav_uint64 sampleCount)
+{
+    drwav_uint64 iSample;
+    for (iSample = 0; iSample < sampleCount; iSample += 1) {
+        pSamples[iSample] = drwav__bswap_s16(pSamples[iSample]);
+    }
+}
+
+
+static DRWAV_INLINE void drwav__bswap_s24(drwav_uint8* p)
+{
+    drwav_uint8 t;
+    t = p[0];
+    p[0] = p[2];
+    p[2] = t;
+}
+
+static DRWAV_INLINE void drwav__bswap_samples_s24(drwav_uint8* pSamples, drwav_uint64 sampleCount)
+{
+    drwav_uint64 iSample;
+    for (iSample = 0; iSample < sampleCount; iSample += 1) {
+        drwav_uint8* pSample = pSamples + (iSample*3);
+        drwav__bswap_s24(pSample);
+    }
+}
+
+
+static DRWAV_INLINE drwav_int32 drwav__bswap_s32(drwav_int32 n)
+{
+    return (drwav_int32)drwav__bswap32((drwav_uint32)n);
+}
+
+static DRWAV_INLINE void drwav__bswap_samples_s32(drwav_int32* pSamples, drwav_uint64 sampleCount)
+{
+    drwav_uint64 iSample;
+    for (iSample = 0; iSample < sampleCount; iSample += 1) {
+        pSamples[iSample] = drwav__bswap_s32(pSamples[iSample]);
+    }
+}
+
+
+static DRWAV_INLINE float drwav__bswap_f32(float n)
+{
+    union {
+        drwav_uint32 i;
+        float f;
+    } x;
+    x.f = n;
+    x.i = drwav__bswap32(x.i);
+
+    return x.f;
+}
+
+static DRWAV_INLINE void drwav__bswap_samples_f32(float* pSamples, drwav_uint64 sampleCount)
+{
+    drwav_uint64 iSample;
+    for (iSample = 0; iSample < sampleCount; iSample += 1) {
+        pSamples[iSample] = drwav__bswap_f32(pSamples[iSample]);
+    }
+}
+
+
+static DRWAV_INLINE double drwav__bswap_f64(double n)
+{
+    union {
+        drwav_uint64 i;
+        double f;
+    } x;
+    x.f = n;
+    x.i = drwav__bswap64(x.i);
+
+    return x.f;
+}
+
+static DRWAV_INLINE void drwav__bswap_samples_f64(double* pSamples, drwav_uint64 sampleCount)
+{
+    drwav_uint64 iSample;
+    for (iSample = 0; iSample < sampleCount; iSample += 1) {
+        pSamples[iSample] = drwav__bswap_f64(pSamples[iSample]);
+    }
+}
+
+
+static DRWAV_INLINE void drwav__bswap_samples_pcm(void* pSamples, drwav_uint64 sampleCount, drwav_uint32 bytesPerSample)
+{
+    /* Assumes integer PCM. Floating point PCM is done in drwav__bswap_samples_ieee(). */
+    switch (bytesPerSample)
+    {
+        case 2: /* s16, s12 (loosely packed) */
+        {
+            drwav__bswap_samples_s16((drwav_int16*)pSamples, sampleCount);
+        } break;
+        case 3: /* s24 */
+        {
+            drwav__bswap_samples_s24((drwav_uint8*)pSamples, sampleCount);
+        } break;
+        case 4: /* s32 */
+        {
+            drwav__bswap_samples_s32((drwav_int32*)pSamples, sampleCount);
+        } break;
+        default:
+        {
+            /* Unsupported format. */
+            DRWAV_ASSERT(DRWAV_FALSE);
+        } break;
+    }
+}
+
+static DRWAV_INLINE void drwav__bswap_samples_ieee(void* pSamples, drwav_uint64 sampleCount, drwav_uint32 bytesPerSample)
+{
+    switch (bytesPerSample)
+    {
+    #if 0   /* Contributions welcome for f16 support. */
+        case 2: /* f16 */
+        {
+            drwav__bswap_samples_f16((drwav_float16*)pSamples, sampleCount);
+        } break;
+    #endif
+        case 4: /* f32 */
+        {
+            drwav__bswap_samples_f32((float*)pSamples, sampleCount);
+        } break;
+        case 8: /* f64 */
+        {
+            drwav__bswap_samples_f64((double*)pSamples, sampleCount);
+        } break;
+        default:
+        {
+            /* Unsupported format. */
+            DRWAV_ASSERT(DRWAV_FALSE);
+        } break;
+    }
+}
+
+static DRWAV_INLINE void drwav__bswap_samples(void* pSamples, drwav_uint64 sampleCount, drwav_uint32 bytesPerSample, drwav_uint16 format)
+{
+    switch (format)
+    {
+        case DR_WAVE_FORMAT_PCM:
+        {
+            drwav__bswap_samples_pcm(pSamples, sampleCount, bytesPerSample);
+        } break;
+
+        case DR_WAVE_FORMAT_IEEE_FLOAT:
+        {
+            drwav__bswap_samples_ieee(pSamples, sampleCount, bytesPerSample);
+        } break;
+
+        case DR_WAVE_FORMAT_ALAW:
+        case DR_WAVE_FORMAT_MULAW:
+        {
+            drwav__bswap_samples_s16((drwav_int16*)pSamples, sampleCount);
+        } break;
+
+        case DR_WAVE_FORMAT_ADPCM:
+        case DR_WAVE_FORMAT_DVI_ADPCM:
+        default:
+        {
+            /* Unsupported format. */
+            DRWAV_ASSERT(DRWAV_FALSE);
+        } break;
+    }
+}
+
+
+static void* drwav__malloc_default(size_t sz, void* pUserData)
+{
+    (void)pUserData;
+    return DRWAV_MALLOC(sz);
+}
+
+static void* drwav__realloc_default(void* p, size_t sz, void* pUserData)
+{
+    (void)pUserData;
+    return DRWAV_REALLOC(p, sz);
+}
+
+static void drwav__free_default(void* p, void* pUserData)
+{
+    (void)pUserData;
+    DRWAV_FREE(p);
+}
+
+
+static void* drwav__malloc_from_callbacks(size_t sz, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks == NULL) {
+        return NULL;
+    }
+
+    if (pAllocationCallbacks->onMalloc != NULL) {
+        return pAllocationCallbacks->onMalloc(sz, pAllocationCallbacks->pUserData);
+    }
+
+    /* Try using realloc(). */
+    if (pAllocationCallbacks->onRealloc != NULL) {
+        return pAllocationCallbacks->onRealloc(NULL, sz, pAllocationCallbacks->pUserData);
+    }
+
+    return NULL;
+}
+
+static void* drwav__realloc_from_callbacks(void* p, size_t szNew, size_t szOld, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks == NULL) {
+        return NULL;
+    }
+
+    if (pAllocationCallbacks->onRealloc != NULL) {
+        return pAllocationCallbacks->onRealloc(p, szNew, pAllocationCallbacks->pUserData);
+    }
+
+    /* Try emulating realloc() in terms of malloc()/free(). */
+    if (pAllocationCallbacks->onMalloc != NULL && pAllocationCallbacks->onFree != NULL) {
+        void* p2;
+
+        p2 = pAllocationCallbacks->onMalloc(szNew, pAllocationCallbacks->pUserData);
+        if (p2 == NULL) {
+            return NULL;
+        }
+
+        if (p != NULL) {
+            DRWAV_COPY_MEMORY(p2, p, szOld);
+            pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
+        }
+
+        return p2;
+    }
+
+    return NULL;
+}
+
+static void drwav__free_from_callbacks(void* p, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (p == NULL || pAllocationCallbacks == NULL) {
+        return;
+    }
+
+    if (pAllocationCallbacks->onFree != NULL) {
+        pAllocationCallbacks->onFree(p, pAllocationCallbacks->pUserData);
+    }
+}
+
+
+static drwav_allocation_callbacks drwav_copy_allocation_callbacks_or_defaults(const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks != NULL) {
+        /* Copy. */
+        return *pAllocationCallbacks;
+    } else {
+        /* Defaults. */
+        drwav_allocation_callbacks allocationCallbacks;
+        allocationCallbacks.pUserData = NULL;
+        allocationCallbacks.onMalloc  = drwav__malloc_default;
+        allocationCallbacks.onRealloc = drwav__realloc_default;
+        allocationCallbacks.onFree    = drwav__free_default;
+        return allocationCallbacks;
+    }
+}
+
+
+static DRWAV_INLINE drwav_bool32 drwav__is_compressed_format_tag(drwav_uint16 formatTag)
+{
+    return
+        formatTag == DR_WAVE_FORMAT_ADPCM ||
+        formatTag == DR_WAVE_FORMAT_DVI_ADPCM;
+}
+
+static unsigned int drwav__chunk_padding_size_riff(drwav_uint64 chunkSize)
+{
+    return (unsigned int)(chunkSize % 2);
+}
+
+static unsigned int drwav__chunk_padding_size_w64(drwav_uint64 chunkSize)
+{
+    return (unsigned int)(chunkSize % 8);
+}
+
+static drwav_uint64 drwav_read_pcm_frames_s16__msadpcm(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut);
+static drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uint64 samplesToRead, drwav_int16* pBufferOut);
+static drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount);
+
+static drwav_result drwav__read_chunk_header(drwav_read_proc onRead, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav_chunk_header* pHeaderOut)
+{
+    if (container == drwav_container_riff) {
+        drwav_uint8 sizeInBytes[4];
+
+        if (onRead(pUserData, pHeaderOut->id.fourcc, 4) != 4) {
+            return DRWAV_AT_END;
+        }
+
+        if (onRead(pUserData, sizeInBytes, 4) != 4) {
+            return DRWAV_INVALID_FILE;
+        }
+
+        pHeaderOut->sizeInBytes = drwav__bytes_to_u32(sizeInBytes);
+        pHeaderOut->paddingSize = drwav__chunk_padding_size_riff(pHeaderOut->sizeInBytes);
+        *pRunningBytesReadOut += 8;
+    } else {
+        drwav_uint8 sizeInBytes[8];
+
+        if (onRead(pUserData, pHeaderOut->id.guid, 16) != 16) {
+            return DRWAV_AT_END;
+        }
+
+        if (onRead(pUserData, sizeInBytes, 8) != 8) {
+            return DRWAV_INVALID_FILE;
+        }
+
+        pHeaderOut->sizeInBytes = drwav__bytes_to_u64(sizeInBytes) - 24;    /* <-- Subtract 24 because w64 includes the size of the header. */
+        pHeaderOut->paddingSize = drwav__chunk_padding_size_w64(pHeaderOut->sizeInBytes);
+        *pRunningBytesReadOut += 24;
+    }
+
+    return DRWAV_SUCCESS;
+}
+
+static drwav_bool32 drwav__seek_forward(drwav_seek_proc onSeek, drwav_uint64 offset, void* pUserData)
+{
+    drwav_uint64 bytesRemainingToSeek = offset;
+    while (bytesRemainingToSeek > 0) {
+        if (bytesRemainingToSeek > 0x7FFFFFFF) {
+            if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_current)) {
+                return DRWAV_FALSE;
+            }
+            bytesRemainingToSeek -= 0x7FFFFFFF;
+        } else {
+            if (!onSeek(pUserData, (int)bytesRemainingToSeek, drwav_seek_origin_current)) {
+                return DRWAV_FALSE;
+            }
+            bytesRemainingToSeek = 0;
+        }
+    }
+
+    return DRWAV_TRUE;
+}
+
+static drwav_bool32 drwav__seek_from_start(drwav_seek_proc onSeek, drwav_uint64 offset, void* pUserData)
+{
+    if (offset <= 0x7FFFFFFF) {
+        return onSeek(pUserData, (int)offset, drwav_seek_origin_start);
+    }
+
+    /* Larger than 32-bit seek. */
+    if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_start)) {
+        return DRWAV_FALSE;
+    }
+    offset -= 0x7FFFFFFF;
+
+    for (;;) {
+        if (offset <= 0x7FFFFFFF) {
+            return onSeek(pUserData, (int)offset, drwav_seek_origin_current);
+        }
+
+        if (!onSeek(pUserData, 0x7FFFFFFF, drwav_seek_origin_current)) {
+            return DRWAV_FALSE;
+        }
+        offset -= 0x7FFFFFFF;
+    }
+
+    /* Should never get here. */
+    /*return DRWAV_TRUE; */
+}
+
+
+static drwav_bool32 drwav__read_fmt(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, drwav_container container, drwav_uint64* pRunningBytesReadOut, drwav_fmt* fmtOut)
+{
+    drwav_chunk_header header;
+    drwav_uint8 fmt[16];
+
+    if (drwav__read_chunk_header(onRead, pUserData, container, pRunningBytesReadOut, &header) != DRWAV_SUCCESS) {
+        return DRWAV_FALSE;
+    }
+
+
+    /* Skip non-fmt chunks. */
+    while ((container == drwav_container_riff && !drwav__fourcc_equal(header.id.fourcc, "fmt ")) || (container == drwav_container_w64 && !drwav__guid_equal(header.id.guid, drwavGUID_W64_FMT))) {
+        if (!drwav__seek_forward(onSeek, header.sizeInBytes + header.paddingSize, pUserData)) {
+            return DRWAV_FALSE;
+        }
+        *pRunningBytesReadOut += header.sizeInBytes + header.paddingSize;
+
+        /* Try the next header. */
+        if (drwav__read_chunk_header(onRead, pUserData, container, pRunningBytesReadOut, &header) != DRWAV_SUCCESS) {
+            return DRWAV_FALSE;
+        }
+    }
+
+
+    /* Validation. */
+    if (container == drwav_container_riff) {
+        if (!drwav__fourcc_equal(header.id.fourcc, "fmt ")) {
+            return DRWAV_FALSE;
+        }
+    } else {
+        if (!drwav__guid_equal(header.id.guid, drwavGUID_W64_FMT)) {
+            return DRWAV_FALSE;
+        }
+    }
+
+
+    if (onRead(pUserData, fmt, sizeof(fmt)) != sizeof(fmt)) {
+        return DRWAV_FALSE;
+    }
+    *pRunningBytesReadOut += sizeof(fmt);
+
+    fmtOut->formatTag      = drwav__bytes_to_u16(fmt + 0);
+    fmtOut->channels       = drwav__bytes_to_u16(fmt + 2);
+    fmtOut->sampleRate     = drwav__bytes_to_u32(fmt + 4);
+    fmtOut->avgBytesPerSec = drwav__bytes_to_u32(fmt + 8);
+    fmtOut->blockAlign     = drwav__bytes_to_u16(fmt + 12);
+    fmtOut->bitsPerSample  = drwav__bytes_to_u16(fmt + 14);
+
+    fmtOut->extendedSize       = 0;
+    fmtOut->validBitsPerSample = 0;
+    fmtOut->channelMask        = 0;
+    memset(fmtOut->subFormat, 0, sizeof(fmtOut->subFormat));
+
+    if (header.sizeInBytes > 16) {
+        drwav_uint8 fmt_cbSize[2];
+        int bytesReadSoFar = 0;
+
+        if (onRead(pUserData, fmt_cbSize, sizeof(fmt_cbSize)) != sizeof(fmt_cbSize)) {
+            return DRWAV_FALSE;    /* Expecting more data. */
+        }
+        *pRunningBytesReadOut += sizeof(fmt_cbSize);
+
+        bytesReadSoFar = 18;
+
+        fmtOut->extendedSize = drwav__bytes_to_u16(fmt_cbSize);
+        if (fmtOut->extendedSize > 0) {
+            /* Simple validation. */
+            if (fmtOut->formatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
+                if (fmtOut->extendedSize != 22) {
+                    return DRWAV_FALSE;
+                }
+            }
+
+            if (fmtOut->formatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
+                drwav_uint8 fmtext[22];
+                if (onRead(pUserData, fmtext, fmtOut->extendedSize) != fmtOut->extendedSize) {
+                    return DRWAV_FALSE;    /* Expecting more data. */
+                }
+
+                fmtOut->validBitsPerSample = drwav__bytes_to_u16(fmtext + 0);
+                fmtOut->channelMask        = drwav__bytes_to_u32(fmtext + 2);
+                drwav__bytes_to_guid(fmtext + 6, fmtOut->subFormat);
+            } else {
+                if (!onSeek(pUserData, fmtOut->extendedSize, drwav_seek_origin_current)) {
+                    return DRWAV_FALSE;
+                }
+            }
+            *pRunningBytesReadOut += fmtOut->extendedSize;
+
+            bytesReadSoFar += fmtOut->extendedSize;
+        }
+
+        /* Seek past any leftover bytes. For w64 the leftover will be defined based on the chunk size. */
+        if (!onSeek(pUserData, (int)(header.sizeInBytes - bytesReadSoFar), drwav_seek_origin_current)) {
+            return DRWAV_FALSE;
+        }
+        *pRunningBytesReadOut += (header.sizeInBytes - bytesReadSoFar);
+    }
+
+    if (header.paddingSize > 0) {
+        if (!onSeek(pUserData, header.paddingSize, drwav_seek_origin_current)) {
+            return DRWAV_FALSE;
+        }
+        *pRunningBytesReadOut += header.paddingSize;
+    }
+
+    return DRWAV_TRUE;
+}
+
+
+static size_t drwav__on_read(drwav_read_proc onRead, void* pUserData, void* pBufferOut, size_t bytesToRead, drwav_uint64* pCursor)
+{
+    size_t bytesRead;
+
+    DRWAV_ASSERT(onRead != NULL);
+    DRWAV_ASSERT(pCursor != NULL);
+
+    bytesRead = onRead(pUserData, pBufferOut, bytesToRead);
+    *pCursor += bytesRead;
+    return bytesRead;
+}
+
+#if 0
+static drwav_bool32 drwav__on_seek(drwav_seek_proc onSeek, void* pUserData, int offset, drwav_seek_origin origin, drwav_uint64* pCursor)
+{
+    DRWAV_ASSERT(onSeek != NULL);
+    DRWAV_ASSERT(pCursor != NULL);
+
+    if (!onSeek(pUserData, offset, origin)) {
+        return DRWAV_FALSE;
+    }
+
+    if (origin == drwav_seek_origin_start) {
+        *pCursor = offset;
+    } else {
+        *pCursor += offset;
+    }
+
+    return DRWAV_TRUE;
+}
+#endif
+
+
+
+static drwav_uint32 drwav_get_bytes_per_pcm_frame(drwav* pWav)
+{
+    /*
+    The bytes per frame is a bit ambiguous. It can be either be based on the bits per sample, or the block align. The way I'm doing it here
+    is that if the bits per sample is a multiple of 8, use floor(bitsPerSample*channels/8), otherwise fall back to the block align.
+    */
+    if ((pWav->bitsPerSample & 0x7) == 0) {
+        /* Bits per sample is a multiple of 8. */
+        return (pWav->bitsPerSample * pWav->fmt.channels) >> 3;
+    } else {
+        return pWav->fmt.blockAlign;
+    }
+}
+
+DRWAV_API drwav_uint16 drwav_fmt_get_format(const drwav_fmt* pFMT)
+{
+    if (pFMT == NULL) {
+        return 0;
+    }
+
+    if (pFMT->formatTag != DR_WAVE_FORMAT_EXTENSIBLE) {
+        return pFMT->formatTag;
+    } else {
+        return drwav__bytes_to_u16(pFMT->subFormat);    /* Only the first two bytes are required. */
+    }
+}
+
+static drwav_bool32 drwav_preinit(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pReadSeekUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pWav == NULL || onRead == NULL || onSeek == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    DRWAV_ZERO_MEMORY(pWav, sizeof(*pWav));
+    pWav->onRead    = onRead;
+    pWav->onSeek    = onSeek;
+    pWav->pUserData = pReadSeekUserData;
+    pWav->allocationCallbacks = drwav_copy_allocation_callbacks_or_defaults(pAllocationCallbacks);
+
+    if (pWav->allocationCallbacks.onFree == NULL || (pWav->allocationCallbacks.onMalloc == NULL && pWav->allocationCallbacks.onRealloc == NULL)) {
+        return DRWAV_FALSE;    /* Invalid allocation callbacks. */
+    }
+
+    return DRWAV_TRUE;
+}
+
+static drwav_bool32 drwav_init__internal(drwav* pWav, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags)
+{
+    /* This function assumes drwav_preinit() has been called beforehand. */
+
+    drwav_uint64 cursor;    /* <-- Keeps track of the byte position so we can seek to specific locations. */
+    drwav_bool32 sequential;
+    drwav_uint8 riff[4];
+    drwav_fmt fmt;
+    unsigned short translatedFormatTag;
+    drwav_uint64 sampleCountFromFactChunk;
+    drwav_bool32 foundDataChunk;
+    drwav_uint64 dataChunkSize;
+    drwav_uint64 chunkSize;
+
+    cursor = 0;
+    sequential = (flags & DRWAV_SEQUENTIAL) != 0;
+
+    /* The first 4 bytes should be the RIFF identifier. */
+    if (drwav__on_read(pWav->onRead, pWav->pUserData, riff, sizeof(riff), &cursor) != sizeof(riff)) {
+        return DRWAV_FALSE;
+    }
+
+    /*
+    The first 4 bytes can be used to identify the container. For RIFF files it will start with "RIFF" and for
+    w64 it will start with "riff".
+    */
+    if (drwav__fourcc_equal(riff, "RIFF")) {
+        pWav->container = drwav_container_riff;
+    } else if (drwav__fourcc_equal(riff, "riff")) {
+        int i;
+        drwav_uint8 riff2[12];
+
+        pWav->container = drwav_container_w64;
+
+        /* Check the rest of the GUID for validity. */
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, riff2, sizeof(riff2), &cursor) != sizeof(riff2)) {
+            return DRWAV_FALSE;
+        }
+
+        for (i = 0; i < 12; ++i) {
+            if (riff2[i] != drwavGUID_W64_RIFF[i+4]) {
+                return DRWAV_FALSE;
+            }
+        }
+    } else {
+        return DRWAV_FALSE;   /* Unknown or unsupported container. */
+    }
+
+
+    if (pWav->container == drwav_container_riff) {
+        drwav_uint8 chunkSizeBytes[4];
+        drwav_uint8 wave[4];
+
+        /* RIFF/WAVE */
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, chunkSizeBytes, sizeof(chunkSizeBytes), &cursor) != sizeof(chunkSizeBytes)) {
+            return DRWAV_FALSE;
+        }
+
+        if (drwav__bytes_to_u32(chunkSizeBytes) < 36) {
+            return DRWAV_FALSE;    /* Chunk size should always be at least 36 bytes. */
+        }
+
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, wave, sizeof(wave), &cursor) != sizeof(wave)) {
+            return DRWAV_FALSE;
+        }
+
+        if (!drwav__fourcc_equal(wave, "WAVE")) {
+            return DRWAV_FALSE;    /* Expecting "WAVE". */
+        }
+    } else {
+        drwav_uint8 chunkSizeBytes[8];
+        drwav_uint8 wave[16];
+
+        /* W64 */
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, chunkSizeBytes, sizeof(chunkSizeBytes), &cursor) != sizeof(chunkSizeBytes)) {
+            return DRWAV_FALSE;
+        }
+
+        if (drwav__bytes_to_u64(chunkSizeBytes) < 80) {
+            return DRWAV_FALSE;
+        }
+
+        if (drwav__on_read(pWav->onRead, pWav->pUserData, wave, sizeof(wave), &cursor) != sizeof(wave)) {
+            return DRWAV_FALSE;
+        }
+
+        if (!drwav__guid_equal(wave, drwavGUID_W64_WAVE)) {
+            return DRWAV_FALSE;
+        }
+    }
+
+
+    /* The next bytes should be the "fmt " chunk. */
+    if (!drwav__read_fmt(pWav->onRead, pWav->onSeek, pWav->pUserData, pWav->container, &cursor, &fmt)) {
+        return DRWAV_FALSE;    /* Failed to read the "fmt " chunk. */
+    }
+
+    /* Basic validation. */
+    if ((fmt.sampleRate    == 0 || fmt.sampleRate    > DRWAV_MAX_SAMPLE_RATE)     ||
+        (fmt.channels      == 0 || fmt.channels      > DRWAV_MAX_CHANNELS)        ||
+        (fmt.bitsPerSample == 0 || fmt.bitsPerSample > DRWAV_MAX_BITS_PER_SAMPLE) ||
+        fmt.blockAlign == 0) {
+        return DRWAV_FALSE; /* Probably an invalid WAV file. */
+    }
+
+
+    /* Translate the internal format. */
+    translatedFormatTag = fmt.formatTag;
+    if (translatedFormatTag == DR_WAVE_FORMAT_EXTENSIBLE) {
+        translatedFormatTag = drwav__bytes_to_u16(fmt.subFormat + 0);
+    }
+
+
+
+    sampleCountFromFactChunk = 0;
+
+    /*
+    We need to enumerate over each chunk for two reasons:
+      1) The "data" chunk may not be the next one
+      2) We may want to report each chunk back to the client
+    
+    In order to correctly report each chunk back to the client we will need to keep looping until the end of the file.
+    */
+    foundDataChunk = DRWAV_FALSE;
+    dataChunkSize = 0;
+
+    /* The next chunk we care about is the "data" chunk. This is not necessarily the next chunk so we'll need to loop. */
+    for (;;)
+    {
+        drwav_chunk_header header;
+        drwav_result result = drwav__read_chunk_header(pWav->onRead, pWav->pUserData, pWav->container, &cursor, &header);
+        if (result != DRWAV_SUCCESS) {
+            if (!foundDataChunk) {
+                return DRWAV_FALSE;
+            } else {
+                break;  /* Probably at the end of the file. Get out of the loop. */
+            }
+        }
+
+        /* Tell the client about this chunk. */
+        if (!sequential && onChunk != NULL) {
+            drwav_uint64 callbackBytesRead = onChunk(pChunkUserData, pWav->onRead, pWav->onSeek, pWav->pUserData, &header, pWav->container, &fmt);
+
+            /*
+            dr_wav may need to read the contents of the chunk, so we now need to seek back to the position before
+            we called the callback.
+            */
+            if (callbackBytesRead > 0) {
+                if (!drwav__seek_from_start(pWav->onSeek, cursor, pWav->pUserData)) {
+                    return DRWAV_FALSE;
+                }
+            }
+        }
+        
+
+        if (!foundDataChunk) {
+            pWav->dataChunkDataPos = cursor;
+        }
+
+        chunkSize = header.sizeInBytes;
+        if (pWav->container == drwav_container_riff) {
+            if (drwav__fourcc_equal(header.id.fourcc, "data")) {
+                foundDataChunk = DRWAV_TRUE;
+                dataChunkSize = chunkSize;
+            }
+        } else {
+            if (drwav__guid_equal(header.id.guid, drwavGUID_W64_DATA)) {
+                foundDataChunk = DRWAV_TRUE;
+                dataChunkSize = chunkSize;
+            }
+        }
+
+        /*
+        If at this point we have found the data chunk and we're running in sequential mode, we need to break out of this loop. The reason for
+        this is that we would otherwise require a backwards seek which sequential mode forbids.
+        */
+        if (foundDataChunk && sequential) {
+            break;
+        }
+
+        /* Optional. Get the total sample count from the FACT chunk. This is useful for compressed formats. */
+        if (pWav->container == drwav_container_riff) {
+            if (drwav__fourcc_equal(header.id.fourcc, "fact")) {
+                drwav_uint32 sampleCount;
+                if (drwav__on_read(pWav->onRead, pWav->pUserData, &sampleCount, 4, &cursor) != 4) {
+                    return DRWAV_FALSE;
+                }
+                chunkSize -= 4;
+
+                if (!foundDataChunk) {
+                    pWav->dataChunkDataPos = cursor;
+                }
+
+                /*
+                The sample count in the "fact" chunk is either unreliable, or I'm not understanding it properly. For now I am only enabling this
+                for Microsoft ADPCM formats.
+                */
+                if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+                    sampleCountFromFactChunk = sampleCount;
+                } else {
+                    sampleCountFromFactChunk = 0;
+                }
+            }
+        } else {
+            if (drwav__guid_equal(header.id.guid, drwavGUID_W64_FACT)) {
+                if (drwav__on_read(pWav->onRead, pWav->pUserData, &sampleCountFromFactChunk, 8, &cursor) != 8) {
+                    return DRWAV_FALSE;
+                }
+                chunkSize -= 8;
+
+                if (!foundDataChunk) {
+                    pWav->dataChunkDataPos = cursor;
+                }
+            }
+        }
+
+        /* "smpl" chunk. */
+        if (pWav->container == drwav_container_riff) {
+            if (drwav__fourcc_equal(header.id.fourcc, "smpl")) {
+                drwav_uint8 smplHeaderData[36];    /* 36 = size of the smpl header section, not including the loop data. */
+                if (chunkSize >= sizeof(smplHeaderData)) {
+                    drwav_uint64 bytesJustRead = drwav__on_read(pWav->onRead, pWav->pUserData, smplHeaderData, sizeof(smplHeaderData), &cursor);
+                    chunkSize -= bytesJustRead;
+
+                    if (bytesJustRead == sizeof(smplHeaderData)) {
+                        drwav_uint32 iLoop;
+
+                        pWav->smpl.manufacturer      = drwav__bytes_to_u32(smplHeaderData+0);
+                        pWav->smpl.product           = drwav__bytes_to_u32(smplHeaderData+4);
+                        pWav->smpl.samplePeriod      = drwav__bytes_to_u32(smplHeaderData+8);
+                        pWav->smpl.midiUnityNotes    = drwav__bytes_to_u32(smplHeaderData+12);
+                        pWav->smpl.midiPitchFraction = drwav__bytes_to_u32(smplHeaderData+16);
+                        pWav->smpl.smpteFormat       = drwav__bytes_to_u32(smplHeaderData+20);
+                        pWav->smpl.smpteOffset       = drwav__bytes_to_u32(smplHeaderData+24);
+                        pWav->smpl.numSampleLoops    = drwav__bytes_to_u32(smplHeaderData+28);
+                        pWav->smpl.samplerData       = drwav__bytes_to_u32(smplHeaderData+32);
+
+                        for (iLoop = 0; iLoop < pWav->smpl.numSampleLoops && iLoop < drwav_countof(pWav->smpl.loops); ++iLoop) {
+                            drwav_uint8 smplLoopData[24];  /* 24 = size of a loop section in the smpl chunk. */
+                            bytesJustRead = drwav__on_read(pWav->onRead, pWav->pUserData, smplLoopData, sizeof(smplLoopData), &cursor);
+                            chunkSize -= bytesJustRead;
+
+                            if (bytesJustRead == sizeof(smplLoopData)) {
+                                pWav->smpl.loops[iLoop].cuePointId = drwav__bytes_to_u32(smplLoopData+0);
+                                pWav->smpl.loops[iLoop].type       = drwav__bytes_to_u32(smplLoopData+4);
+                                pWav->smpl.loops[iLoop].start      = drwav__bytes_to_u32(smplLoopData+8);
+                                pWav->smpl.loops[iLoop].end        = drwav__bytes_to_u32(smplLoopData+12);
+                                pWav->smpl.loops[iLoop].fraction   = drwav__bytes_to_u32(smplLoopData+16);
+                                pWav->smpl.loops[iLoop].playCount  = drwav__bytes_to_u32(smplLoopData+20);
+                            } else {
+                                break;  /* Break from the smpl loop for loop. */
+                            }
+                        }
+                    }
+                } else {
+                    /* Looks like invalid data. Ignore the chunk. */
+                }
+            }
+        } else {
+            if (drwav__guid_equal(header.id.guid, drwavGUID_W64_SMPL)) {
+                /*
+                This path will be hit when a W64 WAV file contains a smpl chunk. I don't have a sample file to test this path, so a contribution
+                is welcome to add support for this.
+                */
+            }
+        }
+
+        /* Make sure we seek past the padding. */
+        chunkSize += header.paddingSize;
+        if (!drwav__seek_forward(pWav->onSeek, chunkSize, pWav->pUserData)) {
+            break;
+        }
+        cursor += chunkSize;
+
+        if (!foundDataChunk) {
+            pWav->dataChunkDataPos = cursor;
+        }
+    }
+
+    /* If we haven't found a data chunk, return an error. */
+    if (!foundDataChunk) {
+        return DRWAV_FALSE;
+    }
+
+    /* We may have moved passed the data chunk. If so we need to move back. If running in sequential mode we can assume we are already sitting on the data chunk. */
+    if (!sequential) {
+        if (!drwav__seek_from_start(pWav->onSeek, pWav->dataChunkDataPos, pWav->pUserData)) {
+            return DRWAV_FALSE;
+        }
+        cursor = pWav->dataChunkDataPos;
+    }
+    
+
+    /* At this point we should be sitting on the first byte of the raw audio data. */
+
+    pWav->fmt                 = fmt;
+    pWav->sampleRate          = fmt.sampleRate;
+    pWav->channels            = fmt.channels;
+    pWav->bitsPerSample       = fmt.bitsPerSample;
+    pWav->bytesRemaining      = dataChunkSize;
+    pWav->translatedFormatTag = translatedFormatTag;
+    pWav->dataChunkDataSize   = dataChunkSize;
+
+    if (sampleCountFromFactChunk != 0) {
+        pWav->totalPCMFrameCount = sampleCountFromFactChunk;
+    } else {
+        pWav->totalPCMFrameCount = dataChunkSize / drwav_get_bytes_per_pcm_frame(pWav);
+
+        if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+            drwav_uint64 totalBlockHeaderSizeInBytes;
+            drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
+
+            /* Make sure any trailing partial block is accounted for. */
+            if ((blockCount * fmt.blockAlign) < dataChunkSize) {
+                blockCount += 1;
+            }
+
+            /* We decode two samples per byte. There will be blockCount headers in the data chunk. This is enough to know how to calculate the total PCM frame count. */
+            totalBlockHeaderSizeInBytes = blockCount * (6*fmt.channels);
+            pWav->totalPCMFrameCount = ((dataChunkSize - totalBlockHeaderSizeInBytes) * 2) / fmt.channels;
+        }
+        if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+            drwav_uint64 totalBlockHeaderSizeInBytes;
+            drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
+
+            /* Make sure any trailing partial block is accounted for. */
+            if ((blockCount * fmt.blockAlign) < dataChunkSize) {
+                blockCount += 1;
+            }
+
+            /* We decode two samples per byte. There will be blockCount headers in the data chunk. This is enough to know how to calculate the total PCM frame count. */
+            totalBlockHeaderSizeInBytes = blockCount * (4*fmt.channels);
+            pWav->totalPCMFrameCount = ((dataChunkSize - totalBlockHeaderSizeInBytes) * 2) / fmt.channels;
+
+            /* The header includes a decoded sample for each channel which acts as the initial predictor sample. */
+            pWav->totalPCMFrameCount += blockCount;
+        }
+    }
+
+    /* Some formats only support a certain number of channels. */
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM || pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+        if (pWav->channels > 2) {
+            return DRWAV_FALSE;
+        }
+    }
+
+#ifdef DR_WAV_LIBSNDFILE_COMPAT
+    /*
+    I use libsndfile as a benchmark for testing, however in the version I'm using (from the Windows installer on the libsndfile website),
+    it appears the total sample count libsndfile uses for MS-ADPCM is incorrect. It would seem they are computing the total sample count
+    from the number of blocks, however this results in the inclusion of extra silent samples at the end of the last block. The correct
+    way to know the total sample count is to inspect the "fact" chunk, which should always be present for compressed formats, and should
+    always include the sample count. This little block of code below is only used to emulate the libsndfile logic so I can properly run my
+    correctness tests against libsndfile, and is disabled by default.
+    */
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+        drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
+        pWav->totalPCMFrameCount = (((blockCount * (fmt.blockAlign - (6*pWav->channels))) * 2)) / fmt.channels;  /* x2 because two samples per byte. */
+    }
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+        drwav_uint64 blockCount = dataChunkSize / fmt.blockAlign;
+        pWav->totalPCMFrameCount = (((blockCount * (fmt.blockAlign - (4*pWav->channels))) * 2) + (blockCount * pWav->channels)) / fmt.channels;
+    }
+#endif
+
+    return DRWAV_TRUE;
+}
+
+DRWAV_API drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_ex(pWav, onRead, onSeek, NULL, pUserData, NULL, 0, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead, drwav_seek_proc onSeek, drwav_chunk_proc onChunk, void* pReadSeekUserData, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (!drwav_preinit(pWav, onRead, onSeek, pReadSeekUserData, pAllocationCallbacks)) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init__internal(pWav, onChunk, pChunkUserData, flags);
+}
+
+
+static drwav_uint32 drwav__riff_chunk_size_riff(drwav_uint64 dataChunkSize)
+{
+    drwav_uint32 dataSubchunkPaddingSize = drwav__chunk_padding_size_riff(dataChunkSize);
+
+    if (dataChunkSize <= (0xFFFFFFFFUL - 36 - dataSubchunkPaddingSize)) {
+        return 36 + (drwav_uint32)(dataChunkSize + dataSubchunkPaddingSize);
+    } else {
+        return 0xFFFFFFFF;
+    }
+}
+
+static drwav_uint32 drwav__data_chunk_size_riff(drwav_uint64 dataChunkSize)
+{
+    if (dataChunkSize <= 0xFFFFFFFFUL) {
+        return (drwav_uint32)dataChunkSize;
+    } else {
+        return 0xFFFFFFFFUL;
+    }
+}
+
+static drwav_uint64 drwav__riff_chunk_size_w64(drwav_uint64 dataChunkSize)
+{
+    drwav_uint64 dataSubchunkPaddingSize = drwav__chunk_padding_size_w64(dataChunkSize);
+
+    return 80 + 24 + dataChunkSize + dataSubchunkPaddingSize;   /* +24 because W64 includes the size of the GUID and size fields. */
+}
+
+static drwav_uint64 drwav__data_chunk_size_w64(drwav_uint64 dataChunkSize)
+{
+    return 24 + dataChunkSize;        /* +24 because W64 includes the size of the GUID and size fields. */
+}
+
+
+static drwav_bool32 drwav_preinit_write(drwav* pWav, const drwav_data_format* pFormat, drwav_bool32 isSequential, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pWav == NULL || onWrite == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    if (!isSequential && onSeek == NULL) {
+        return DRWAV_FALSE; /* <-- onSeek is required when in non-sequential mode. */
+    }
+
+    /* Not currently supporting compressed formats. Will need to add support for the "fact" chunk before we enable this. */
+    if (pFormat->format == DR_WAVE_FORMAT_EXTENSIBLE) {
+        return DRWAV_FALSE;
+    }
+    if (pFormat->format == DR_WAVE_FORMAT_ADPCM || pFormat->format == DR_WAVE_FORMAT_DVI_ADPCM) {
+        return DRWAV_FALSE;
+    }
+
+    DRWAV_ZERO_MEMORY(pWav, sizeof(*pWav));
+    pWav->onWrite   = onWrite;
+    pWav->onSeek    = onSeek;
+    pWav->pUserData = pUserData;
+    pWav->allocationCallbacks = drwav_copy_allocation_callbacks_or_defaults(pAllocationCallbacks);
+
+    if (pWav->allocationCallbacks.onFree == NULL || (pWav->allocationCallbacks.onMalloc == NULL && pWav->allocationCallbacks.onRealloc == NULL)) {
+        return DRWAV_FALSE;    /* Invalid allocation callbacks. */
+    }
+
+    pWav->fmt.formatTag = (drwav_uint16)pFormat->format;
+    pWav->fmt.channels = (drwav_uint16)pFormat->channels;
+    pWav->fmt.sampleRate = pFormat->sampleRate;
+    pWav->fmt.avgBytesPerSec = (drwav_uint32)((pFormat->bitsPerSample * pFormat->sampleRate * pFormat->channels) / 8);
+    pWav->fmt.blockAlign = (drwav_uint16)((pFormat->channels * pFormat->bitsPerSample) / 8);
+    pWav->fmt.bitsPerSample = (drwav_uint16)pFormat->bitsPerSample;
+    pWav->fmt.extendedSize = 0;
+    pWav->isSequentialWrite = isSequential;
+
+    return DRWAV_TRUE;
+}
+
+static drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount)
+{
+    /* The function assumes drwav_preinit_write() was called beforehand. */
+
+    size_t runningPos = 0;
+    drwav_uint64 initialDataChunkSize = 0;
+    drwav_uint64 chunkSizeFMT;
+
+    /*
+    The initial values for the "RIFF" and "data" chunks depends on whether or not we are initializing in sequential mode or not. In
+    sequential mode we set this to its final values straight away since they can be calculated from the total sample count. In non-
+    sequential mode we initialize it all to zero and fill it out in drwav_uninit() using a backwards seek.
+    */
+    if (pWav->isSequentialWrite) {
+        initialDataChunkSize = (totalSampleCount * pWav->fmt.bitsPerSample) / 8;
+
+        /*
+        The RIFF container has a limit on the number of samples. drwav is not allowing this. There's no practical limits for Wave64
+        so for the sake of simplicity I'm not doing any validation for that.
+        */
+        if (pFormat->container == drwav_container_riff) {
+            if (initialDataChunkSize > (0xFFFFFFFFUL - 36)) {
+                return DRWAV_FALSE; /* Not enough room to store every sample. */
+            }
+        }
+    }
+
+    pWav->dataChunkDataSizeTargetWrite = initialDataChunkSize;
+
+
+    /* "RIFF" chunk. */
+    if (pFormat->container == drwav_container_riff) {
+        drwav_uint32 chunkSizeRIFF = 36 + (drwav_uint32)initialDataChunkSize;   /* +36 = "RIFF"+[RIFF Chunk Size]+"WAVE" + [sizeof "fmt " chunk] */
+        runningPos += pWav->onWrite(pWav->pUserData, "RIFF", 4);
+        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeRIFF, 4);
+        runningPos += pWav->onWrite(pWav->pUserData, "WAVE", 4);
+    } else {
+        drwav_uint64 chunkSizeRIFF = 80 + 24 + initialDataChunkSize;   /* +24 because W64 includes the size of the GUID and size fields. */
+        runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_RIFF, 16);
+        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeRIFF, 8);
+        runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_WAVE, 16);
+    }
+
+    /* "fmt " chunk. */
+    if (pFormat->container == drwav_container_riff) {
+        chunkSizeFMT = 16;
+        runningPos += pWav->onWrite(pWav->pUserData, "fmt ", 4);
+        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeFMT, 4);
+    } else {
+        chunkSizeFMT = 40;
+        runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_FMT, 16);
+        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeFMT, 8);
+    }
+
+    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.formatTag,      2);
+    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.channels,       2);
+    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.sampleRate,     4);
+    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.avgBytesPerSec, 4);
+    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.blockAlign,     2);
+    runningPos += pWav->onWrite(pWav->pUserData, &pWav->fmt.bitsPerSample,  2);
+
+    pWav->dataChunkDataPos = runningPos;
+
+    /* "data" chunk. */
+    if (pFormat->container == drwav_container_riff) {
+        drwav_uint32 chunkSizeDATA = (drwav_uint32)initialDataChunkSize;
+        runningPos += pWav->onWrite(pWav->pUserData, "data", 4);
+        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeDATA, 4);
+    } else {
+        drwav_uint64 chunkSizeDATA = 24 + initialDataChunkSize; /* +24 because W64 includes the size of the GUID and size fields. */
+        runningPos += pWav->onWrite(pWav->pUserData, drwavGUID_W64_DATA, 16);
+        runningPos += pWav->onWrite(pWav->pUserData, &chunkSizeDATA, 8);
+    }
+
+
+    /* Simple validation. */
+    if (pFormat->container == drwav_container_riff) {
+        if (runningPos != 20 + chunkSizeFMT + 8) {
+            return DRWAV_FALSE;
+        }
+    } else {
+        if (runningPos != 40 + chunkSizeFMT + 24) {
+            return DRWAV_FALSE;
+        }
+    }
+    
+
+    /* Set some properties for the client's convenience. */
+    pWav->container = pFormat->container;
+    pWav->channels = (drwav_uint16)pFormat->channels;
+    pWav->sampleRate = pFormat->sampleRate;
+    pWav->bitsPerSample = (drwav_uint16)pFormat->bitsPerSample;
+    pWav->translatedFormatTag = (drwav_uint16)pFormat->format;
+
+    return DRWAV_TRUE;
+}
+
+
+DRWAV_API drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_format* pFormat, drwav_write_proc onWrite, drwav_seek_proc onSeek, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (!drwav_preinit_write(pWav, pFormat, DRWAV_FALSE, onWrite, onSeek, pUserData, pAllocationCallbacks)) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init_write__internal(pWav, pFormat, 0);               /* DRWAV_FALSE = Not Sequential */
+}
+
+DRWAV_API drwav_bool32 drwav_init_write_sequential(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (!drwav_preinit_write(pWav, pFormat, DRWAV_TRUE, onWrite, NULL, pUserData, pAllocationCallbacks)) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init_write__internal(pWav, pFormat, totalSampleCount); /* DRWAV_TRUE = Sequential */
+}
+
+DRWAV_API drwav_bool32 drwav_init_write_sequential_pcm_frames(drwav* pWav, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, drwav_write_proc onWrite, void* pUserData, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pFormat == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init_write_sequential(pWav, pFormat, totalPCMFrameCount*pFormat->channels, onWrite, pUserData, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_uint64 drwav_target_write_size_bytes(const drwav_data_format* pFormat, drwav_uint64 totalSampleCount)
+{
+    /* Casting totalSampleCount to drwav_int64 for VC6 compatibility. No issues in practice because nobody is going to exhaust the whole 63 bits. */
+    drwav_uint64 targetDataSizeBytes = (drwav_uint64)((drwav_int64)totalSampleCount * pFormat->channels * pFormat->bitsPerSample/8.0);
+    drwav_uint64 riffChunkSizeBytes;
+    drwav_uint64 fileSizeBytes;
+
+    if (pFormat->container == drwav_container_riff) {
+        riffChunkSizeBytes = drwav__riff_chunk_size_riff(targetDataSizeBytes);
+        fileSizeBytes = (8 + riffChunkSizeBytes); /* +8 because WAV doesn't include the size of the ChunkID and ChunkSize fields. */
+    } else {
+        riffChunkSizeBytes = drwav__riff_chunk_size_w64(targetDataSizeBytes);
+        fileSizeBytes = riffChunkSizeBytes;
+    }
+
+    return fileSizeBytes;
+}
+
+
+#ifndef DR_WAV_NO_STDIO
+
+/* drwav_result_from_errno() is only used for fopen() and wfopen() so putting it inside DR_WAV_NO_STDIO for now. If something else needs this later we can move it out. */
+#include <errno.h>
+static drwav_result drwav_result_from_errno(int e)
+{
+    switch (e)
+    {
+        case 0: return DRWAV_SUCCESS;
+    #ifdef EPERM
+        case EPERM: return DRWAV_INVALID_OPERATION;
+    #endif
+    #ifdef ENOENT
+        case ENOENT: return DRWAV_DOES_NOT_EXIST;
+    #endif
+    #ifdef ESRCH
+        case ESRCH: return DRWAV_DOES_NOT_EXIST;
+    #endif
+    #ifdef EINTR
+        case EINTR: return DRWAV_INTERRUPT;
+    #endif
+    #ifdef EIO
+        case EIO: return DRWAV_IO_ERROR;
+    #endif
+    #ifdef ENXIO
+        case ENXIO: return DRWAV_DOES_NOT_EXIST;
+    #endif
+    #ifdef E2BIG
+        case E2BIG: return DRWAV_INVALID_ARGS;
+    #endif
+    #ifdef ENOEXEC
+        case ENOEXEC: return DRWAV_INVALID_FILE;
+    #endif
+    #ifdef EBADF
+        case EBADF: return DRWAV_INVALID_FILE;
+    #endif
+    #ifdef ECHILD
+        case ECHILD: return DRWAV_ERROR;
+    #endif
+    #ifdef EAGAIN
+        case EAGAIN: return DRWAV_UNAVAILABLE;
+    #endif
+    #ifdef ENOMEM
+        case ENOMEM: return DRWAV_OUT_OF_MEMORY;
+    #endif
+    #ifdef EACCES
+        case EACCES: return DRWAV_ACCESS_DENIED;
+    #endif
+    #ifdef EFAULT
+        case EFAULT: return DRWAV_BAD_ADDRESS;
+    #endif
+    #ifdef ENOTBLK
+        case ENOTBLK: return DRWAV_ERROR;
+    #endif
+    #ifdef EBUSY
+        case EBUSY: return DRWAV_BUSY;
+    #endif
+    #ifdef EEXIST
+        case EEXIST: return DRWAV_ALREADY_EXISTS;
+    #endif
+    #ifdef EXDEV
+        case EXDEV: return DRWAV_ERROR;
+    #endif
+    #ifdef ENODEV
+        case ENODEV: return DRWAV_DOES_NOT_EXIST;
+    #endif
+    #ifdef ENOTDIR
+        case ENOTDIR: return DRWAV_NOT_DIRECTORY;
+    #endif
+    #ifdef EISDIR
+        case EISDIR: return DRWAV_IS_DIRECTORY;
+    #endif
+    #ifdef EINVAL
+        case EINVAL: return DRWAV_INVALID_ARGS;
+    #endif
+    #ifdef ENFILE
+        case ENFILE: return DRWAV_TOO_MANY_OPEN_FILES;
+    #endif
+    #ifdef EMFILE
+        case EMFILE: return DRWAV_TOO_MANY_OPEN_FILES;
+    #endif
+    #ifdef ENOTTY
+        case ENOTTY: return DRWAV_INVALID_OPERATION;
+    #endif
+    #ifdef ETXTBSY
+        case ETXTBSY: return DRWAV_BUSY;
+    #endif
+    #ifdef EFBIG
+        case EFBIG: return DRWAV_TOO_BIG;
+    #endif
+    #ifdef ENOSPC
+        case ENOSPC: return DRWAV_NO_SPACE;
+    #endif
+    #ifdef ESPIPE
+        case ESPIPE: return DRWAV_BAD_SEEK;
+    #endif
+    #ifdef EROFS
+        case EROFS: return DRWAV_ACCESS_DENIED;
+    #endif
+    #ifdef EMLINK
+        case EMLINK: return DRWAV_TOO_MANY_LINKS;
+    #endif
+    #ifdef EPIPE
+        case EPIPE: return DRWAV_BAD_PIPE;
+    #endif
+    #ifdef EDOM
+        case EDOM: return DRWAV_OUT_OF_RANGE;
+    #endif
+    #ifdef ERANGE
+        case ERANGE: return DRWAV_OUT_OF_RANGE;
+    #endif
+    #ifdef EDEADLK
+        case EDEADLK: return DRWAV_DEADLOCK;
+    #endif
+    #ifdef ENAMETOOLONG
+        case ENAMETOOLONG: return DRWAV_PATH_TOO_LONG;
+    #endif
+    #ifdef ENOLCK
+        case ENOLCK: return DRWAV_ERROR;
+    #endif
+    #ifdef ENOSYS
+        case ENOSYS: return DRWAV_NOT_IMPLEMENTED;
+    #endif
+    #ifdef ENOTEMPTY
+        case ENOTEMPTY: return DRWAV_DIRECTORY_NOT_EMPTY;
+    #endif
+    #ifdef ELOOP
+        case ELOOP: return DRWAV_TOO_MANY_LINKS;
+    #endif
+    #ifdef ENOMSG
+        case ENOMSG: return DRWAV_NO_MESSAGE;
+    #endif
+    #ifdef EIDRM
+        case EIDRM: return DRWAV_ERROR;
+    #endif
+    #ifdef ECHRNG
+        case ECHRNG: return DRWAV_ERROR;
+    #endif
+    #ifdef EL2NSYNC
+        case EL2NSYNC: return DRWAV_ERROR;
+    #endif
+    #ifdef EL3HLT
+        case EL3HLT: return DRWAV_ERROR;
+    #endif
+    #ifdef EL3RST
+        case EL3RST: return DRWAV_ERROR;
+    #endif
+    #ifdef ELNRNG
+        case ELNRNG: return DRWAV_OUT_OF_RANGE;
+    #endif
+    #ifdef EUNATCH
+        case EUNATCH: return DRWAV_ERROR;
+    #endif
+    #ifdef ENOCSI
+        case ENOCSI: return DRWAV_ERROR;
+    #endif
+    #ifdef EL2HLT
+        case EL2HLT: return DRWAV_ERROR;
+    #endif
+    #ifdef EBADE
+        case EBADE: return DRWAV_ERROR;
+    #endif
+    #ifdef EBADR
+        case EBADR: return DRWAV_ERROR;
+    #endif
+    #ifdef EXFULL
+        case EXFULL: return DRWAV_ERROR;
+    #endif
+    #ifdef ENOANO
+        case ENOANO: return DRWAV_ERROR;
+    #endif
+    #ifdef EBADRQC
+        case EBADRQC: return DRWAV_ERROR;
+    #endif
+    #ifdef EBADSLT
+        case EBADSLT: return DRWAV_ERROR;
+    #endif
+    #ifdef EBFONT
+        case EBFONT: return DRWAV_INVALID_FILE;
+    #endif
+    #ifdef ENOSTR
+        case ENOSTR: return DRWAV_ERROR;
+    #endif
+    #ifdef ENODATA
+        case ENODATA: return DRWAV_NO_DATA_AVAILABLE;
+    #endif
+    #ifdef ETIME
+        case ETIME: return DRWAV_TIMEOUT;
+    #endif
+    #ifdef ENOSR
+        case ENOSR: return DRWAV_NO_DATA_AVAILABLE;
+    #endif
+    #ifdef ENONET
+        case ENONET: return DRWAV_NO_NETWORK;
+    #endif
+    #ifdef ENOPKG
+        case ENOPKG: return DRWAV_ERROR;
+    #endif
+    #ifdef EREMOTE
+        case EREMOTE: return DRWAV_ERROR;
+    #endif
+    #ifdef ENOLINK
+        case ENOLINK: return DRWAV_ERROR;
+    #endif
+    #ifdef EADV
+        case EADV: return DRWAV_ERROR;
+    #endif
+    #ifdef ESRMNT
+        case ESRMNT: return DRWAV_ERROR;
+    #endif
+    #ifdef ECOMM
+        case ECOMM: return DRWAV_ERROR;
+    #endif
+    #ifdef EPROTO
+        case EPROTO: return DRWAV_ERROR;
+    #endif
+    #ifdef EMULTIHOP
+        case EMULTIHOP: return DRWAV_ERROR;
+    #endif
+    #ifdef EDOTDOT
+        case EDOTDOT: return DRWAV_ERROR;
+    #endif
+    #ifdef EBADMSG
+        case EBADMSG: return DRWAV_BAD_MESSAGE;
+    #endif
+    #ifdef EOVERFLOW
+        case EOVERFLOW: return DRWAV_TOO_BIG;
+    #endif
+    #ifdef ENOTUNIQ
+        case ENOTUNIQ: return DRWAV_NOT_UNIQUE;
+    #endif
+    #ifdef EBADFD
+        case EBADFD: return DRWAV_ERROR;
+    #endif
+    #ifdef EREMCHG
+        case EREMCHG: return DRWAV_ERROR;
+    #endif
+    #ifdef ELIBACC
+        case ELIBACC: return DRWAV_ACCESS_DENIED;
+    #endif
+    #ifdef ELIBBAD
+        case ELIBBAD: return DRWAV_INVALID_FILE;
+    #endif
+    #ifdef ELIBSCN
+        case ELIBSCN: return DRWAV_INVALID_FILE;
+    #endif
+    #ifdef ELIBMAX
+        case ELIBMAX: return DRWAV_ERROR;
+    #endif
+    #ifdef ELIBEXEC
+        case ELIBEXEC: return DRWAV_ERROR;
+    #endif
+    #ifdef EILSEQ
+        case EILSEQ: return DRWAV_INVALID_DATA;
+    #endif
+    #ifdef ERESTART
+        case ERESTART: return DRWAV_ERROR;
+    #endif
+    #ifdef ESTRPIPE
+        case ESTRPIPE: return DRWAV_ERROR;
+    #endif
+    #ifdef EUSERS
+        case EUSERS: return DRWAV_ERROR;
+    #endif
+    #ifdef ENOTSOCK
+        case ENOTSOCK: return DRWAV_NOT_SOCKET;
+    #endif
+    #ifdef EDESTADDRREQ
+        case EDESTADDRREQ: return DRWAV_NO_ADDRESS;
+    #endif
+    #ifdef EMSGSIZE
+        case EMSGSIZE: return DRWAV_TOO_BIG;
+    #endif
+    #ifdef EPROTOTYPE
+        case EPROTOTYPE: return DRWAV_BAD_PROTOCOL;
+    #endif
+    #ifdef ENOPROTOOPT
+        case ENOPROTOOPT: return DRWAV_PROTOCOL_UNAVAILABLE;
+    #endif
+    #ifdef EPROTONOSUPPORT
+        case EPROTONOSUPPORT: return DRWAV_PROTOCOL_NOT_SUPPORTED;
+    #endif
+    #ifdef ESOCKTNOSUPPORT
+        case ESOCKTNOSUPPORT: return DRWAV_SOCKET_NOT_SUPPORTED;
+    #endif
+    #ifdef EOPNOTSUPP
+        case EOPNOTSUPP: return DRWAV_INVALID_OPERATION;
+    #endif
+    #ifdef EPFNOSUPPORT
+        case EPFNOSUPPORT: return DRWAV_PROTOCOL_FAMILY_NOT_SUPPORTED;
+    #endif
+    #ifdef EAFNOSUPPORT
+        case EAFNOSUPPORT: return DRWAV_ADDRESS_FAMILY_NOT_SUPPORTED;
+    #endif
+    #ifdef EADDRINUSE
+        case EADDRINUSE: return DRWAV_ALREADY_IN_USE;
+    #endif
+    #ifdef EADDRNOTAVAIL
+        case EADDRNOTAVAIL: return DRWAV_ERROR;
+    #endif
+    #ifdef ENETDOWN
+        case ENETDOWN: return DRWAV_NO_NETWORK;
+    #endif
+    #ifdef ENETUNREACH
+        case ENETUNREACH: return DRWAV_NO_NETWORK;
+    #endif
+    #ifdef ENETRESET
+        case ENETRESET: return DRWAV_NO_NETWORK;
+    #endif
+    #ifdef ECONNABORTED
+        case ECONNABORTED: return DRWAV_NO_NETWORK;
+    #endif
+    #ifdef ECONNRESET
+        case ECONNRESET: return DRWAV_CONNECTION_RESET;
+    #endif
+    #ifdef ENOBUFS
+        case ENOBUFS: return DRWAV_NO_SPACE;
+    #endif
+    #ifdef EISCONN
+        case EISCONN: return DRWAV_ALREADY_CONNECTED;
+    #endif
+    #ifdef ENOTCONN
+        case ENOTCONN: return DRWAV_NOT_CONNECTED;
+    #endif
+    #ifdef ESHUTDOWN
+        case ESHUTDOWN: return DRWAV_ERROR;
+    #endif
+    #ifdef ETOOMANYREFS
+        case ETOOMANYREFS: return DRWAV_ERROR;
+    #endif
+    #ifdef ETIMEDOUT
+        case ETIMEDOUT: return DRWAV_TIMEOUT;
+    #endif
+    #ifdef ECONNREFUSED
+        case ECONNREFUSED: return DRWAV_CONNECTION_REFUSED;
+    #endif
+    #ifdef EHOSTDOWN
+        case EHOSTDOWN: return DRWAV_NO_HOST;
+    #endif
+    #ifdef EHOSTUNREACH
+        case EHOSTUNREACH: return DRWAV_NO_HOST;
+    #endif
+    #ifdef EALREADY
+        case EALREADY: return DRWAV_IN_PROGRESS;
+    #endif
+    #ifdef EINPROGRESS
+        case EINPROGRESS: return DRWAV_IN_PROGRESS;
+    #endif
+    #ifdef ESTALE
+        case ESTALE: return DRWAV_INVALID_FILE;
+    #endif
+    #ifdef EUCLEAN
+        case EUCLEAN: return DRWAV_ERROR;
+    #endif
+    #ifdef ENOTNAM
+        case ENOTNAM: return DRWAV_ERROR;
+    #endif
+    #ifdef ENAVAIL
+        case ENAVAIL: return DRWAV_ERROR;
+    #endif
+    #ifdef EISNAM
+        case EISNAM: return DRWAV_ERROR;
+    #endif
+    #ifdef EREMOTEIO
+        case EREMOTEIO: return DRWAV_IO_ERROR;
+    #endif
+    #ifdef EDQUOT
+        case EDQUOT: return DRWAV_NO_SPACE;
+    #endif
+    #ifdef ENOMEDIUM
+        case ENOMEDIUM: return DRWAV_DOES_NOT_EXIST;
+    #endif
+    #ifdef EMEDIUMTYPE
+        case EMEDIUMTYPE: return DRWAV_ERROR;
+    #endif
+    #ifdef ECANCELED
+        case ECANCELED: return DRWAV_CANCELLED;
+    #endif
+    #ifdef ENOKEY
+        case ENOKEY: return DRWAV_ERROR;
+    #endif
+    #ifdef EKEYEXPIRED
+        case EKEYEXPIRED: return DRWAV_ERROR;
+    #endif
+    #ifdef EKEYREVOKED
+        case EKEYREVOKED: return DRWAV_ERROR;
+    #endif
+    #ifdef EKEYREJECTED
+        case EKEYREJECTED: return DRWAV_ERROR;
+    #endif
+    #ifdef EOWNERDEAD
+        case EOWNERDEAD: return DRWAV_ERROR;
+    #endif
+    #ifdef ENOTRECOVERABLE
+        case ENOTRECOVERABLE: return DRWAV_ERROR;
+    #endif
+    #ifdef ERFKILL
+        case ERFKILL: return DRWAV_ERROR;
+    #endif
+    #ifdef EHWPOISON
+        case EHWPOISON: return DRWAV_ERROR;
+    #endif
+        default: return DRWAV_ERROR;
+    }
+}
+
+static drwav_result drwav_fopen(FILE** ppFile, const char* pFilePath, const char* pOpenMode)
+{
+#if _MSC_VER && _MSC_VER >= 1400
+    errno_t err;
+#endif
+
+    if (ppFile != NULL) {
+        *ppFile = NULL;  /* Safety. */
+    }
+
+    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
+        return DRWAV_INVALID_ARGS;
+    }
+
+#if _MSC_VER && _MSC_VER >= 1400
+    err = fopen_s(ppFile, pFilePath, pOpenMode);
+    if (err != 0) {
+        return drwav_result_from_errno(err);
+    }
+#else
+#if defined(_WIN32) || defined(__APPLE__)
+    *ppFile = fopen(pFilePath, pOpenMode);
+#else
+    #if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS == 64 && defined(_LARGEFILE64_SOURCE)
+        *ppFile = fopen64(pFilePath, pOpenMode);
+    #else
+        *ppFile = fopen(pFilePath, pOpenMode);
+    #endif
+#endif
+    if (*ppFile == NULL) {
+        drwav_result result = drwav_result_from_errno(errno);
+        if (result == DRWAV_SUCCESS) {
+            result = DRWAV_ERROR;   /* Just a safety check to make sure we never ever return success when pFile == NULL. */
+        }
+
+        return result;
+    }
+#endif
+
+    return DRWAV_SUCCESS;
+}
+
+/*
+_wfopen() isn't always available in all compilation environments.
+
+    * Windows only.
+    * MSVC seems to support it universally as far back as VC6 from what I can tell (haven't checked further back).
+    * MinGW-64 (both 32- and 64-bit) seems to support it.
+    * MinGW wraps it in !defined(__STRICT_ANSI__).
+
+This can be reviewed as compatibility issues arise. The preference is to use _wfopen_s() and _wfopen() as opposed to the wcsrtombs()
+fallback, so if you notice your compiler not detecting this properly I'm happy to look at adding support.
+*/
+#if defined(_WIN32)
+    #if defined(_MSC_VER) || defined(__MINGW64__) || !defined(__STRICT_ANSI__)
+        #define DRWAV_HAS_WFOPEN
+    #endif
+#endif
+
+static drwav_result drwav_wfopen(FILE** ppFile, const wchar_t* pFilePath, const wchar_t* pOpenMode, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (ppFile != NULL) {
+        *ppFile = NULL;  /* Safety. */
+    }
+
+    if (pFilePath == NULL || pOpenMode == NULL || ppFile == NULL) {
+        return DRWAV_INVALID_ARGS;
+    }
+
+#if defined(DRWAV_HAS_WFOPEN)
+    {
+        /* Use _wfopen() on Windows. */
+    #if defined(_MSC_VER) && _MSC_VER >= 1400
+        errno_t err = _wfopen_s(ppFile, pFilePath, pOpenMode);
+        if (err != 0) {
+            return drwav_result_from_errno(err);
+        }
+    #else
+        *ppFile = _wfopen(pFilePath, pOpenMode);
+        if (*ppFile == NULL) {
+            return drwav_result_from_errno(errno);
+        }
+    #endif
+        (void)pAllocationCallbacks;
+    }
+#else
+    /*
+    Use fopen() on anything other than Windows. Requires a conversion. This is annoying because fopen() is locale specific. The only real way I can
+    think of to do this is with wcsrtombs(). Note that wcstombs() is apparently not thread-safe because it uses a static global mbstate_t object for
+    maintaining state. I've checked this with -std=c89 and it works, but if somebody get's a compiler error I'll look into improving compatibility.
+    */
+    {
+        mbstate_t mbs;
+        size_t lenMB;
+        const wchar_t* pFilePathTemp = pFilePath;
+        char* pFilePathMB = NULL;
+        char pOpenModeMB[32] = {0};
+
+        /* Get the length first. */
+        DRWAV_ZERO_OBJECT(&mbs);
+        lenMB = wcsrtombs(NULL, &pFilePathTemp, 0, &mbs);
+        if (lenMB == (size_t)-1) {
+            return drwav_result_from_errno(errno);
+        }
+
+        pFilePathMB = (char*)drwav__malloc_from_callbacks(lenMB + 1, pAllocationCallbacks);
+        if (pFilePathMB == NULL) {
+            return DRWAV_OUT_OF_MEMORY;
+        }
+
+        pFilePathTemp = pFilePath;
+        DRWAV_ZERO_OBJECT(&mbs);
+        wcsrtombs(pFilePathMB, &pFilePathTemp, lenMB + 1, &mbs);
+
+        /* The open mode should always consist of ASCII characters so we should be able to do a trivial conversion. */
+        {
+            size_t i = 0;
+            for (;;) {
+                if (pOpenMode[i] == 0) {
+                    pOpenModeMB[i] = '\0';
+                    break;
+                }
+
+                pOpenModeMB[i] = (char)pOpenMode[i];
+                i += 1;
+            }
+        }
+
+        *ppFile = fopen(pFilePathMB, pOpenModeMB);
+
+        drwav__free_from_callbacks(pFilePathMB, pAllocationCallbacks);
+    }
+
+    if (*ppFile == NULL) {
+        return DRWAV_ERROR;
+    }
+#endif
+
+    return DRWAV_SUCCESS;
+}
+
+
+static size_t drwav__on_read_stdio(void* pUserData, void* pBufferOut, size_t bytesToRead)
+{
+    return fread(pBufferOut, 1, bytesToRead, (FILE*)pUserData);
+}
+
+static size_t drwav__on_write_stdio(void* pUserData, const void* pData, size_t bytesToWrite)
+{
+    return fwrite(pData, 1, bytesToWrite, (FILE*)pUserData);
+}
+
+static drwav_bool32 drwav__on_seek_stdio(void* pUserData, int offset, drwav_seek_origin origin)
+{
+    return fseek((FILE*)pUserData, offset, (origin == drwav_seek_origin_current) ? SEEK_CUR : SEEK_SET) == 0;
+}
+
+DRWAV_API drwav_bool32 drwav_init_file(drwav* pWav, const char* filename, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_file_ex(pWav, filename, NULL, NULL, 0, pAllocationCallbacks);
+}
+
+
+static drwav_bool32 drwav_init_file__internal_FILE(drwav* pWav, FILE* pFile, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav_bool32 result;
+
+    result = drwav_preinit(pWav, drwav__on_read_stdio, drwav__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    if (result != DRWAV_TRUE) {
+        fclose(pFile);
+        return result;
+    }
+
+    result = drwav_init__internal(pWav, onChunk, pChunkUserData, flags);
+    if (result != DRWAV_TRUE) {
+        fclose(pFile);
+        return result;
+    }
+
+    return DRWAV_TRUE;
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_ex(drwav* pWav, const char* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    FILE* pFile;
+    if (drwav_fopen(&pFile, filename, "rb") != DRWAV_SUCCESS) {
+        return DRWAV_FALSE;
+    }
+
+    /* This takes ownership of the FILE* object. */
+    return drwav_init_file__internal_FILE(pWav, pFile, onChunk, pChunkUserData, flags, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_w(drwav* pWav, const wchar_t* filename, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_file_ex_w(pWav, filename, NULL, NULL, 0, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_ex_w(drwav* pWav, const wchar_t* filename, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    FILE* pFile;
+    if (drwav_wfopen(&pFile, filename, L"rb", pAllocationCallbacks) != DRWAV_SUCCESS) {
+        return DRWAV_FALSE;
+    }
+
+    /* This takes ownership of the FILE* object. */
+    return drwav_init_file__internal_FILE(pWav, pFile, onChunk, pChunkUserData, flags, pAllocationCallbacks);
+}
+
+
+static drwav_bool32 drwav_init_file_write__internal_FILE(drwav* pWav, FILE* pFile, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav_bool32 result;
+
+    result = drwav_preinit_write(pWav, pFormat, isSequential, drwav__on_write_stdio, drwav__on_seek_stdio, (void*)pFile, pAllocationCallbacks);
+    if (result != DRWAV_TRUE) {
+        fclose(pFile);
+        return result;
+    }
+
+    result = drwav_init_write__internal(pWav, pFormat, totalSampleCount);
+    if (result != DRWAV_TRUE) {
+        fclose(pFile);
+        return result;
+    }
+
+    return DRWAV_TRUE;
+}
+
+static drwav_bool32 drwav_init_file_write__internal(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    FILE* pFile;
+    if (drwav_fopen(&pFile, filename, "wb") != DRWAV_SUCCESS) {
+        return DRWAV_FALSE;
+    }
+
+    /* This takes ownership of the FILE* object. */
+    return drwav_init_file_write__internal_FILE(pWav, pFile, pFormat, totalSampleCount, isSequential, pAllocationCallbacks);
+}
+
+static drwav_bool32 drwav_init_file_write_w__internal(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    FILE* pFile;
+    if (drwav_wfopen(&pFile, filename, L"wb", pAllocationCallbacks) != DRWAV_SUCCESS) {
+        return DRWAV_FALSE;
+    }
+
+    /* This takes ownership of the FILE* object. */
+    return drwav_init_file_write__internal_FILE(pWav, pFile, pFormat, totalSampleCount, isSequential, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_write(drwav* pWav, const char* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_file_write__internal(pWav, filename, pFormat, 0, DRWAV_FALSE, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_file_write__internal(pWav, filename, pFormat, totalSampleCount, DRWAV_TRUE, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames(drwav* pWav, const char* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pFormat == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init_file_write_sequential(pWav, filename, pFormat, totalPCMFrameCount*pFormat->channels, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_write_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_file_write_w__internal(pWav, filename, pFormat, 0, DRWAV_FALSE, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_file_write_w__internal(pWav, filename, pFormat, totalSampleCount, DRWAV_TRUE, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames_w(drwav* pWav, const wchar_t* filename, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pFormat == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init_file_write_sequential_w(pWav, filename, pFormat, totalPCMFrameCount*pFormat->channels, pAllocationCallbacks);
+}
+#endif  /* DR_WAV_NO_STDIO */
+
+
+static size_t drwav__on_read_memory(void* pUserData, void* pBufferOut, size_t bytesToRead)
+{
+    drwav* pWav = (drwav*)pUserData;
+    size_t bytesRemaining;
+
+    DRWAV_ASSERT(pWav != NULL);
+    DRWAV_ASSERT(pWav->memoryStream.dataSize >= pWav->memoryStream.currentReadPos);
+
+    bytesRemaining = pWav->memoryStream.dataSize - pWav->memoryStream.currentReadPos;
+    if (bytesToRead > bytesRemaining) {
+        bytesToRead = bytesRemaining;
+    }
+
+    if (bytesToRead > 0) {
+        DRWAV_COPY_MEMORY(pBufferOut, pWav->memoryStream.data + pWav->memoryStream.currentReadPos, bytesToRead);
+        pWav->memoryStream.currentReadPos += bytesToRead;
+    }
+
+    return bytesToRead;
+}
+
+static drwav_bool32 drwav__on_seek_memory(void* pUserData, int offset, drwav_seek_origin origin)
+{
+    drwav* pWav = (drwav*)pUserData;
+    DRWAV_ASSERT(pWav != NULL);
+
+    if (origin == drwav_seek_origin_current) {
+        if (offset > 0) {
+            if (pWav->memoryStream.currentReadPos + offset > pWav->memoryStream.dataSize) {
+                return DRWAV_FALSE; /* Trying to seek too far forward. */
+            }
+        } else {
+            if (pWav->memoryStream.currentReadPos < (size_t)-offset) {
+                return DRWAV_FALSE; /* Trying to seek too far backwards. */
+            }
+        }
+
+        /* This will never underflow thanks to the clamps above. */
+        pWav->memoryStream.currentReadPos += offset;
+    } else {
+        if ((drwav_uint32)offset <= pWav->memoryStream.dataSize) {
+            pWav->memoryStream.currentReadPos = offset;
+        } else {
+            return DRWAV_FALSE; /* Trying to seek too far forward. */
+        }
+    }
+    
+    return DRWAV_TRUE;
+}
+
+static size_t drwav__on_write_memory(void* pUserData, const void* pDataIn, size_t bytesToWrite)
+{
+    drwav* pWav = (drwav*)pUserData;
+    size_t bytesRemaining;
+
+    DRWAV_ASSERT(pWav != NULL);
+    DRWAV_ASSERT(pWav->memoryStreamWrite.dataCapacity >= pWav->memoryStreamWrite.currentWritePos);
+
+    bytesRemaining = pWav->memoryStreamWrite.dataCapacity - pWav->memoryStreamWrite.currentWritePos;
+    if (bytesRemaining < bytesToWrite) {
+        /* Need to reallocate. */
+        void* pNewData;
+        size_t newDataCapacity = (pWav->memoryStreamWrite.dataCapacity == 0) ? 256 : pWav->memoryStreamWrite.dataCapacity * 2;
+
+        /* If doubling wasn't enough, just make it the minimum required size to write the data. */
+        if ((newDataCapacity - pWav->memoryStreamWrite.currentWritePos) < bytesToWrite) {
+            newDataCapacity = pWav->memoryStreamWrite.currentWritePos + bytesToWrite;
+        }
+
+        pNewData = drwav__realloc_from_callbacks(*pWav->memoryStreamWrite.ppData, newDataCapacity, pWav->memoryStreamWrite.dataCapacity, &pWav->allocationCallbacks);
+        if (pNewData == NULL) {
+            return 0;
+        }
+
+        *pWav->memoryStreamWrite.ppData = pNewData;
+        pWav->memoryStreamWrite.dataCapacity = newDataCapacity;
+    }
+
+    DRWAV_COPY_MEMORY(((drwav_uint8*)(*pWav->memoryStreamWrite.ppData)) + pWav->memoryStreamWrite.currentWritePos, pDataIn, bytesToWrite);
+
+    pWav->memoryStreamWrite.currentWritePos += bytesToWrite;
+    if (pWav->memoryStreamWrite.dataSize < pWav->memoryStreamWrite.currentWritePos) {
+        pWav->memoryStreamWrite.dataSize = pWav->memoryStreamWrite.currentWritePos;
+    }
+
+    *pWav->memoryStreamWrite.pDataSize = pWav->memoryStreamWrite.dataSize;
+
+    return bytesToWrite;
+}
+
+static drwav_bool32 drwav__on_seek_memory_write(void* pUserData, int offset, drwav_seek_origin origin)
+{
+    drwav* pWav = (drwav*)pUserData;
+    DRWAV_ASSERT(pWav != NULL);
+
+    if (origin == drwav_seek_origin_current) {
+        if (offset > 0) {
+            if (pWav->memoryStreamWrite.currentWritePos + offset > pWav->memoryStreamWrite.dataSize) {
+                offset = (int)(pWav->memoryStreamWrite.dataSize - pWav->memoryStreamWrite.currentWritePos);  /* Trying to seek too far forward. */
+            }
+        } else {
+            if (pWav->memoryStreamWrite.currentWritePos < (size_t)-offset) {
+                offset = -(int)pWav->memoryStreamWrite.currentWritePos;  /* Trying to seek too far backwards. */
+            }
+        }
+
+        /* This will never underflow thanks to the clamps above. */
+        pWav->memoryStreamWrite.currentWritePos += offset;
+    } else {
+        if ((drwav_uint32)offset <= pWav->memoryStreamWrite.dataSize) {
+            pWav->memoryStreamWrite.currentWritePos = offset;
+        } else {
+            pWav->memoryStreamWrite.currentWritePos = pWav->memoryStreamWrite.dataSize;  /* Trying to seek too far forward. */
+        }
+    }
+    
+    return DRWAV_TRUE;
+}
+
+DRWAV_API drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, size_t dataSize, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_memory_ex(pWav, data, dataSize, NULL, NULL, 0, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_memory_ex(drwav* pWav, const void* data, size_t dataSize, drwav_chunk_proc onChunk, void* pChunkUserData, drwav_uint32 flags, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (data == NULL || dataSize == 0) {
+        return DRWAV_FALSE;
+    }
+
+    if (!drwav_preinit(pWav, drwav__on_read_memory, drwav__on_seek_memory, pWav, pAllocationCallbacks)) {
+        return DRWAV_FALSE;
+    }
+
+    pWav->memoryStream.data = (const drwav_uint8*)data;
+    pWav->memoryStream.dataSize = dataSize;
+    pWav->memoryStream.currentReadPos = 0;
+
+    return drwav_init__internal(pWav, onChunk, pChunkUserData, flags);
+}
+
+
+static drwav_bool32 drwav_init_memory_write__internal(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, drwav_bool32 isSequential, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (ppData == NULL || pDataSize == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    *ppData = NULL; /* Important because we're using realloc()! */
+    *pDataSize = 0;
+
+    if (!drwav_preinit_write(pWav, pFormat, isSequential, drwav__on_write_memory, drwav__on_seek_memory_write, pWav, pAllocationCallbacks)) {
+        return DRWAV_FALSE;
+    }
+
+    pWav->memoryStreamWrite.ppData = ppData;
+    pWav->memoryStreamWrite.pDataSize = pDataSize;
+    pWav->memoryStreamWrite.dataSize = 0;
+    pWav->memoryStreamWrite.dataCapacity = 0;
+    pWav->memoryStreamWrite.currentWritePos = 0;
+
+    return drwav_init_write__internal(pWav, pFormat, totalSampleCount);
+}
+
+DRWAV_API drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_memory_write__internal(pWav, ppData, pDataSize, pFormat, 0, DRWAV_FALSE, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_memory_write_sequential(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalSampleCount, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    return drwav_init_memory_write__internal(pWav, ppData, pDataSize, pFormat, totalSampleCount, DRWAV_TRUE, pAllocationCallbacks);
+}
+
+DRWAV_API drwav_bool32 drwav_init_memory_write_sequential_pcm_frames(drwav* pWav, void** ppData, size_t* pDataSize, const drwav_data_format* pFormat, drwav_uint64 totalPCMFrameCount, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pFormat == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    return drwav_init_memory_write_sequential(pWav, ppData, pDataSize, pFormat, totalPCMFrameCount*pFormat->channels, pAllocationCallbacks);
+}
+
+
+
+DRWAV_API drwav_result drwav_uninit(drwav* pWav)
+{
+    drwav_result result = DRWAV_SUCCESS;
+
+    if (pWav == NULL) {
+        return DRWAV_INVALID_ARGS;
+    }
+
+    /*
+    If the drwav object was opened in write mode we'll need to finalize a few things:
+      - Make sure the "data" chunk is aligned to 16-bits for RIFF containers, or 64 bits for W64 containers.
+      - Set the size of the "data" chunk.
+    */
+    if (pWav->onWrite != NULL) {
+        drwav_uint32 paddingSize = 0;
+
+        /* Padding. Do not adjust pWav->dataChunkDataSize - this should not include the padding. */
+        if (pWav->container == drwav_container_riff) {
+            paddingSize = drwav__chunk_padding_size_riff(pWav->dataChunkDataSize);
+        } else {
+            paddingSize = drwav__chunk_padding_size_w64(pWav->dataChunkDataSize);
+        }
+        
+        if (paddingSize > 0) {
+            drwav_uint64 paddingData = 0;
+            pWav->onWrite(pWav->pUserData, &paddingData, paddingSize);
+        }
+
+        /*
+        Chunk sizes. When using sequential mode, these will have been filled in at initialization time. We only need
+        to do this when using non-sequential mode.
+        */
+        if (pWav->onSeek && !pWav->isSequentialWrite) {
+            if (pWav->container == drwav_container_riff) {
+                /* The "RIFF" chunk size. */
+                if (pWav->onSeek(pWav->pUserData, 4, drwav_seek_origin_start)) {
+                    drwav_uint32 riffChunkSize = drwav__riff_chunk_size_riff(pWav->dataChunkDataSize);
+                    pWav->onWrite(pWav->pUserData, &riffChunkSize, 4);
+                }
+
+                /* the "data" chunk size. */
+                if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos + 4, drwav_seek_origin_start)) {
+                    drwav_uint32 dataChunkSize = drwav__data_chunk_size_riff(pWav->dataChunkDataSize);
+                    pWav->onWrite(pWav->pUserData, &dataChunkSize, 4);
+                }
+            } else {
+                /* The "RIFF" chunk size. */
+                if (pWav->onSeek(pWav->pUserData, 16, drwav_seek_origin_start)) {
+                    drwav_uint64 riffChunkSize = drwav__riff_chunk_size_w64(pWav->dataChunkDataSize);
+                    pWav->onWrite(pWav->pUserData, &riffChunkSize, 8);
+                }
+
+                /* The "data" chunk size. */
+                if (pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos + 16, drwav_seek_origin_start)) {
+                    drwav_uint64 dataChunkSize = drwav__data_chunk_size_w64(pWav->dataChunkDataSize);
+                    pWav->onWrite(pWav->pUserData, &dataChunkSize, 8);
+                }
+            }
+        }
+
+        /* Validation for sequential mode. */
+        if (pWav->isSequentialWrite) {
+            if (pWav->dataChunkDataSize != pWav->dataChunkDataSizeTargetWrite) {
+                result = DRWAV_INVALID_FILE;
+            }
+        }
+    }
+
+#ifndef DR_WAV_NO_STDIO
+    /*
+    If we opened the file with drwav_open_file() we will want to close the file handle. We can know whether or not drwav_open_file()
+    was used by looking at the onRead and onSeek callbacks.
+    */
+    if (pWav->onRead == drwav__on_read_stdio || pWav->onWrite == drwav__on_write_stdio) {
+        fclose((FILE*)pWav->pUserData);
+    }
+#endif
+
+    return result;
+}
+
+
+
+DRWAV_API size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* pBufferOut)
+{
+    size_t bytesRead;
+
+    if (pWav == NULL || bytesToRead == 0 || pBufferOut == NULL) {
+        return 0;
+    }
+
+    if (bytesToRead > pWav->bytesRemaining) {
+        bytesToRead = (size_t)pWav->bytesRemaining;
+    }
+
+    bytesRead = pWav->onRead(pWav->pUserData, pBufferOut, bytesToRead);
+
+    pWav->bytesRemaining -= bytesRead;
+    return bytesRead;
+}
+
+
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_le(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut)
+{
+    drwav_uint32 bytesPerFrame;
+
+    if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) {
+        return 0;
+    }
+
+    /* Cannot use this function for compressed formats. */
+    if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
+        return 0;
+    }
+
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    /* Don't try to read more samples than can potentially fit in the output buffer. */
+    if (framesToRead * bytesPerFrame > DRWAV_SIZE_MAX) {
+        framesToRead = DRWAV_SIZE_MAX / bytesPerFrame;
+    }
+
+    return drwav_read_raw(pWav, (size_t)(framesToRead * bytesPerFrame), pBufferOut) / bytesPerFrame;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_be(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut)
+{
+    drwav_uint64 framesRead = drwav_read_pcm_frames_le(pWav, framesToRead, pBufferOut);
+    drwav__bswap_samples(pBufferOut, framesRead*pWav->channels, drwav_get_bytes_per_pcm_frame(pWav)/pWav->channels, pWav->translatedFormatTag);
+
+    return framesRead;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames(drwav* pWav, drwav_uint64 framesToRead, void* pBufferOut)
+{
+    if (drwav__is_little_endian()) {
+        return drwav_read_pcm_frames_le(pWav, framesToRead, pBufferOut);
+    } else {
+        return drwav_read_pcm_frames_be(pWav, framesToRead, pBufferOut);
+    }
+}
+
+
+
+DRWAV_API drwav_bool32 drwav_seek_to_first_pcm_frame(drwav* pWav)
+{
+    if (pWav->onWrite != NULL) {
+        return DRWAV_FALSE; /* No seeking in write mode. */
+    }
+
+    if (!pWav->onSeek(pWav->pUserData, (int)pWav->dataChunkDataPos, drwav_seek_origin_start)) {
+        return DRWAV_FALSE;
+    }
+
+    if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
+        pWav->compressed.iCurrentPCMFrame = 0;
+    }
+    
+    pWav->bytesRemaining = pWav->dataChunkDataSize;
+    return DRWAV_TRUE;
+}
+
+DRWAV_API drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64 targetFrameIndex)
+{
+    /* Seeking should be compatible with wave files > 2GB. */
+
+    if (pWav == NULL || pWav->onSeek == NULL) {
+        return DRWAV_FALSE;
+    }
+
+    /* No seeking in write mode. */
+    if (pWav->onWrite != NULL) {
+        return DRWAV_FALSE;
+    }
+
+    /* If there are no samples, just return DRWAV_TRUE without doing anything. */
+    if (pWav->totalPCMFrameCount == 0) {
+        return DRWAV_TRUE;
+    }
+
+    /* Make sure the sample is clamped. */
+    if (targetFrameIndex >= pWav->totalPCMFrameCount) {
+        targetFrameIndex  = pWav->totalPCMFrameCount - 1;
+    }
+
+    /*
+    For compressed formats we just use a slow generic seek. If we are seeking forward we just seek forward. If we are going backwards we need
+    to seek back to the start.
+    */
+    if (drwav__is_compressed_format_tag(pWav->translatedFormatTag)) {
+        /* TODO: This can be optimized. */
+        
+        /*
+        If we're seeking forward it's simple - just keep reading samples until we hit the sample we're requesting. If we're seeking backwards,
+        we first need to seek back to the start and then just do the same thing as a forward seek.
+        */
+        if (targetFrameIndex < pWav->compressed.iCurrentPCMFrame) {
+            if (!drwav_seek_to_first_pcm_frame(pWav)) {
+                return DRWAV_FALSE;
+            }
+        }
+
+        if (targetFrameIndex > pWav->compressed.iCurrentPCMFrame) {
+            drwav_uint64 offsetInFrames = targetFrameIndex - pWav->compressed.iCurrentPCMFrame;
+
+            drwav_int16 devnull[2048];
+            while (offsetInFrames > 0) {
+                drwav_uint64 framesRead = 0;
+                drwav_uint64 framesToRead = offsetInFrames;
+                if (framesToRead > drwav_countof(devnull)/pWav->channels) {
+                    framesToRead = drwav_countof(devnull)/pWav->channels;
+                }
+
+                if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+                    framesRead = drwav_read_pcm_frames_s16__msadpcm(pWav, framesToRead, devnull);
+                } else if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+                    framesRead = drwav_read_pcm_frames_s16__ima(pWav, framesToRead, devnull);
+                } else {
+                    DRWAV_ASSERT(DRWAV_FALSE);  /* If this assertion is triggered it means I've implemented a new compressed format but forgot to add a branch for it here. */
+                }
+
+                if (framesRead != framesToRead) {
+                    return DRWAV_FALSE;
+                }
+
+                offsetInFrames -= framesRead;
+            }
+        }
+    } else {
+        drwav_uint64 totalSizeInBytes;
+        drwav_uint64 currentBytePos;
+        drwav_uint64 targetBytePos;
+        drwav_uint64 offset;
+
+        totalSizeInBytes = pWav->totalPCMFrameCount * drwav_get_bytes_per_pcm_frame(pWav);
+        DRWAV_ASSERT(totalSizeInBytes >= pWav->bytesRemaining);
+
+        currentBytePos = totalSizeInBytes - pWav->bytesRemaining;
+        targetBytePos  = targetFrameIndex * drwav_get_bytes_per_pcm_frame(pWav);
+
+        if (currentBytePos < targetBytePos) {
+            /* Offset forwards. */
+            offset = (targetBytePos - currentBytePos);
+        } else {
+            /* Offset backwards. */
+            if (!drwav_seek_to_first_pcm_frame(pWav)) {
+                return DRWAV_FALSE;
+            }
+            offset = targetBytePos;
+        }
+
+        while (offset > 0) {
+            int offset32 = ((offset > INT_MAX) ? INT_MAX : (int)offset);
+            if (!pWav->onSeek(pWav->pUserData, offset32, drwav_seek_origin_current)) {
+                return DRWAV_FALSE;
+            }
+
+            pWav->bytesRemaining -= offset32;
+            offset -= offset32;
+        }
+    }
+
+    return DRWAV_TRUE;
+}
+
+
+DRWAV_API size_t drwav_write_raw(drwav* pWav, size_t bytesToWrite, const void* pData)
+{
+    size_t bytesWritten;
+
+    if (pWav == NULL || bytesToWrite == 0 || pData == NULL) {
+        return 0;
+    }
+
+    bytesWritten = pWav->onWrite(pWav->pUserData, pData, bytesToWrite);
+    pWav->dataChunkDataSize += bytesWritten;
+
+    return bytesWritten;
+}
+
+
+DRWAV_API drwav_uint64 drwav_write_pcm_frames_le(drwav* pWav, drwav_uint64 framesToWrite, const void* pData)
+{
+    drwav_uint64 bytesToWrite;
+    drwav_uint64 bytesWritten;
+    const drwav_uint8* pRunningData;
+
+    if (pWav == NULL || framesToWrite == 0 || pData == NULL) {
+        return 0;
+    }
+
+    bytesToWrite = ((framesToWrite * pWav->channels * pWav->bitsPerSample) / 8);
+    if (bytesToWrite > DRWAV_SIZE_MAX) {
+        return 0;
+    }
+
+    bytesWritten = 0;
+    pRunningData = (const drwav_uint8*)pData;
+
+    while (bytesToWrite > 0) {
+        size_t bytesJustWritten;
+        drwav_uint64 bytesToWriteThisIteration;
+
+        bytesToWriteThisIteration = bytesToWrite;
+        DRWAV_ASSERT(bytesToWriteThisIteration <= DRWAV_SIZE_MAX);  /* <-- This is checked above. */
+
+        bytesJustWritten = drwav_write_raw(pWav, (size_t)bytesToWriteThisIteration, pRunningData);
+        if (bytesJustWritten == 0) {
+            break;
+        }
+
+        bytesToWrite -= bytesJustWritten;
+        bytesWritten += bytesJustWritten;
+        pRunningData += bytesJustWritten;
+    }
+
+    return (bytesWritten * 8) / pWav->bitsPerSample / pWav->channels;
+}
+
+DRWAV_API drwav_uint64 drwav_write_pcm_frames_be(drwav* pWav, drwav_uint64 framesToWrite, const void* pData)
+{
+    drwav_uint64 bytesToWrite;
+    drwav_uint64 bytesWritten;
+    drwav_uint32 bytesPerSample;
+    const drwav_uint8* pRunningData;
+
+    if (pWav == NULL || framesToWrite == 0 || pData == NULL) {
+        return 0;
+    }
+
+    bytesToWrite = ((framesToWrite * pWav->channels * pWav->bitsPerSample) / 8);
+    if (bytesToWrite > DRWAV_SIZE_MAX) {
+        return 0;
+    }
+
+    bytesWritten = 0;
+    pRunningData = (const drwav_uint8*)pData;
+
+    bytesPerSample = drwav_get_bytes_per_pcm_frame(pWav) / pWav->channels;
+    
+    while (bytesToWrite > 0) {
+        drwav_uint8 temp[4096];
+        drwav_uint32 sampleCount;
+        size_t bytesJustWritten;
+        drwav_uint64 bytesToWriteThisIteration;
+
+        bytesToWriteThisIteration = bytesToWrite;
+        DRWAV_ASSERT(bytesToWriteThisIteration <= DRWAV_SIZE_MAX);  /* <-- This is checked above. */
+
+        /*
+        WAV files are always little-endian. We need to byte swap on big-endian architectures. Since our input buffer is read-only we need
+        to use an intermediary buffer for the conversion.
+        */
+        sampleCount = sizeof(temp)/bytesPerSample;
+
+        if (bytesToWriteThisIteration > ((drwav_uint64)sampleCount)*bytesPerSample) {
+            bytesToWriteThisIteration = ((drwav_uint64)sampleCount)*bytesPerSample;
+        }
+
+        DRWAV_COPY_MEMORY(temp, pRunningData, (size_t)bytesToWriteThisIteration);
+        drwav__bswap_samples(temp, sampleCount, bytesPerSample, pWav->translatedFormatTag);
+
+        bytesJustWritten = drwav_write_raw(pWav, (size_t)bytesToWriteThisIteration, temp);
+        if (bytesJustWritten == 0) {
+            break;
+        }
+
+        bytesToWrite -= bytesJustWritten;
+        bytesWritten += bytesJustWritten;
+        pRunningData += bytesJustWritten;
+    }
+
+    return (bytesWritten * 8) / pWav->bitsPerSample / pWav->channels;
+}
+
+DRWAV_API drwav_uint64 drwav_write_pcm_frames(drwav* pWav, drwav_uint64 framesToWrite, const void* pData)
+{
+    if (drwav__is_little_endian()) {
+        return drwav_write_pcm_frames_le(pWav, framesToWrite, pData);
+    } else {
+        return drwav_write_pcm_frames_be(pWav, framesToWrite, pData);
+    }
+}
+
+
+static drwav_uint64 drwav_read_pcm_frames_s16__msadpcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 totalFramesRead = 0;
+
+    DRWAV_ASSERT(pWav != NULL);
+    DRWAV_ASSERT(framesToRead > 0);
+    DRWAV_ASSERT(pBufferOut != NULL);
+
+    /* TODO: Lots of room for optimization here. */
+
+    while (framesToRead > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) {
+        /* If there are no cached frames we need to load a new block. */
+        if (pWav->msadpcm.cachedFrameCount == 0 && pWav->msadpcm.bytesRemainingInBlock == 0) {
+            if (pWav->channels == 1) {
+                /* Mono. */
+                drwav_uint8 header[7];
+                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
+                    return totalFramesRead;
+                }
+                pWav->msadpcm.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
+
+                pWav->msadpcm.predictor[0]     = header[0];
+                pWav->msadpcm.delta[0]         = drwav__bytes_to_s16(header + 1);
+                pWav->msadpcm.prevFrames[0][1] = (drwav_int32)drwav__bytes_to_s16(header + 3);
+                pWav->msadpcm.prevFrames[0][0] = (drwav_int32)drwav__bytes_to_s16(header + 5);
+                pWav->msadpcm.cachedFrames[2]  = pWav->msadpcm.prevFrames[0][0];
+                pWav->msadpcm.cachedFrames[3]  = pWav->msadpcm.prevFrames[0][1];
+                pWav->msadpcm.cachedFrameCount = 2;
+            } else {
+                /* Stereo. */
+                drwav_uint8 header[14];
+                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
+                    return totalFramesRead;
+                }
+                pWav->msadpcm.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
+
+                pWav->msadpcm.predictor[0] = header[0];
+                pWav->msadpcm.predictor[1] = header[1];
+                pWav->msadpcm.delta[0] = drwav__bytes_to_s16(header + 2);
+                pWav->msadpcm.delta[1] = drwav__bytes_to_s16(header + 4);
+                pWav->msadpcm.prevFrames[0][1] = (drwav_int32)drwav__bytes_to_s16(header + 6);
+                pWav->msadpcm.prevFrames[1][1] = (drwav_int32)drwav__bytes_to_s16(header + 8);
+                pWav->msadpcm.prevFrames[0][0] = (drwav_int32)drwav__bytes_to_s16(header + 10);
+                pWav->msadpcm.prevFrames[1][0] = (drwav_int32)drwav__bytes_to_s16(header + 12);
+
+                pWav->msadpcm.cachedFrames[0] = pWav->msadpcm.prevFrames[0][0];
+                pWav->msadpcm.cachedFrames[1] = pWav->msadpcm.prevFrames[1][0];
+                pWav->msadpcm.cachedFrames[2] = pWav->msadpcm.prevFrames[0][1];
+                pWav->msadpcm.cachedFrames[3] = pWav->msadpcm.prevFrames[1][1];
+                pWav->msadpcm.cachedFrameCount = 2;
+            }
+        }
+
+        /* Output anything that's cached. */
+        while (framesToRead > 0 && pWav->msadpcm.cachedFrameCount > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) {
+            drwav_uint32 iSample = 0;
+            for (iSample = 0; iSample < pWav->channels; iSample += 1) {
+                pBufferOut[iSample] = (drwav_int16)pWav->msadpcm.cachedFrames[(drwav_countof(pWav->msadpcm.cachedFrames) - (pWav->msadpcm.cachedFrameCount*pWav->channels)) + iSample];
+            }
+
+            pBufferOut      += pWav->channels;
+            framesToRead    -= 1;
+            totalFramesRead += 1;
+            pWav->compressed.iCurrentPCMFrame += 1;
+            pWav->msadpcm.cachedFrameCount -= 1;
+        }
+
+        if (framesToRead == 0) {
+            return totalFramesRead;
+        }
+
+
+        /*
+        If there's nothing left in the cache, just go ahead and load more. If there's nothing left to load in the current block we just continue to the next
+        loop iteration which will trigger the loading of a new block.
+        */
+        if (pWav->msadpcm.cachedFrameCount == 0) {
+            if (pWav->msadpcm.bytesRemainingInBlock == 0) {
+                continue;
+            } else {
+                static drwav_int32 adaptationTable[] = { 
+                    230, 230, 230, 230, 307, 409, 512, 614, 
+                    768, 614, 512, 409, 307, 230, 230, 230 
+                };
+                static drwav_int32 coeff1Table[] = { 256, 512, 0, 192, 240, 460,  392 };
+                static drwav_int32 coeff2Table[] = { 0,  -256, 0, 64,  0,  -208, -232 };
+
+                drwav_uint8 nibbles;
+                drwav_int32 nibble0;
+                drwav_int32 nibble1;
+
+                if (pWav->onRead(pWav->pUserData, &nibbles, 1) != 1) {
+                    return totalFramesRead;
+                }
+                pWav->msadpcm.bytesRemainingInBlock -= 1;
+
+                /* TODO: Optimize away these if statements. */
+                nibble0 = ((nibbles & 0xF0) >> 4); if ((nibbles & 0x80)) { nibble0 |= 0xFFFFFFF0UL; }
+                nibble1 = ((nibbles & 0x0F) >> 0); if ((nibbles & 0x08)) { nibble1 |= 0xFFFFFFF0UL; }
+
+                if (pWav->channels == 1) {
+                    /* Mono. */
+                    drwav_int32 newSample0;
+                    drwav_int32 newSample1;
+
+                    newSample0  = ((pWav->msadpcm.prevFrames[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevFrames[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8;
+                    newSample0 += nibble0 * pWav->msadpcm.delta[0];
+                    newSample0  = drwav_clamp(newSample0, -32768, 32767);
+
+                    pWav->msadpcm.delta[0] = (adaptationTable[((nibbles & 0xF0) >> 4)] * pWav->msadpcm.delta[0]) >> 8;
+                    if (pWav->msadpcm.delta[0] < 16) {
+                        pWav->msadpcm.delta[0] = 16;
+                    }
+
+                    pWav->msadpcm.prevFrames[0][0] = pWav->msadpcm.prevFrames[0][1];
+                    pWav->msadpcm.prevFrames[0][1] = newSample0;
+
+
+                    newSample1  = ((pWav->msadpcm.prevFrames[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevFrames[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8;
+                    newSample1 += nibble1 * pWav->msadpcm.delta[0];
+                    newSample1  = drwav_clamp(newSample1, -32768, 32767);
+
+                    pWav->msadpcm.delta[0] = (adaptationTable[((nibbles & 0x0F) >> 0)] * pWav->msadpcm.delta[0]) >> 8;
+                    if (pWav->msadpcm.delta[0] < 16) {
+                        pWav->msadpcm.delta[0] = 16;
+                    }
+
+                    pWav->msadpcm.prevFrames[0][0] = pWav->msadpcm.prevFrames[0][1];
+                    pWav->msadpcm.prevFrames[0][1] = newSample1;
+
+
+                    pWav->msadpcm.cachedFrames[2] = newSample0;
+                    pWav->msadpcm.cachedFrames[3] = newSample1;
+                    pWav->msadpcm.cachedFrameCount = 2;
+                } else {
+                    /* Stereo. */
+                    drwav_int32 newSample0;
+                    drwav_int32 newSample1;
+
+                    /* Left. */
+                    newSample0  = ((pWav->msadpcm.prevFrames[0][1] * coeff1Table[pWav->msadpcm.predictor[0]]) + (pWav->msadpcm.prevFrames[0][0] * coeff2Table[pWav->msadpcm.predictor[0]])) >> 8;
+                    newSample0 += nibble0 * pWav->msadpcm.delta[0];
+                    newSample0  = drwav_clamp(newSample0, -32768, 32767);
+
+                    pWav->msadpcm.delta[0] = (adaptationTable[((nibbles & 0xF0) >> 4)] * pWav->msadpcm.delta[0]) >> 8;
+                    if (pWav->msadpcm.delta[0] < 16) {
+                        pWav->msadpcm.delta[0] = 16;
+                    }
+
+                    pWav->msadpcm.prevFrames[0][0] = pWav->msadpcm.prevFrames[0][1];
+                    pWav->msadpcm.prevFrames[0][1] = newSample0;
+
+
+                    /* Right. */
+                    newSample1  = ((pWav->msadpcm.prevFrames[1][1] * coeff1Table[pWav->msadpcm.predictor[1]]) + (pWav->msadpcm.prevFrames[1][0] * coeff2Table[pWav->msadpcm.predictor[1]])) >> 8;
+                    newSample1 += nibble1 * pWav->msadpcm.delta[1];
+                    newSample1  = drwav_clamp(newSample1, -32768, 32767);
+
+                    pWav->msadpcm.delta[1] = (adaptationTable[((nibbles & 0x0F) >> 0)] * pWav->msadpcm.delta[1]) >> 8;
+                    if (pWav->msadpcm.delta[1] < 16) {
+                        pWav->msadpcm.delta[1] = 16;
+                    }
+
+                    pWav->msadpcm.prevFrames[1][0] = pWav->msadpcm.prevFrames[1][1];
+                    pWav->msadpcm.prevFrames[1][1] = newSample1;
+
+                    pWav->msadpcm.cachedFrames[2] = newSample0;
+                    pWav->msadpcm.cachedFrames[3] = newSample1;
+                    pWav->msadpcm.cachedFrameCount = 1;
+                }
+            }
+        }
+    }
+
+    return totalFramesRead;
+}
+
+
+static drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 totalFramesRead = 0;
+
+    DRWAV_ASSERT(pWav != NULL);
+    DRWAV_ASSERT(framesToRead > 0);
+    DRWAV_ASSERT(pBufferOut != NULL);
+
+    /* TODO: Lots of room for optimization here. */
+
+    while (framesToRead > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) {
+        /* If there are no cached samples we need to load a new block. */
+        if (pWav->ima.cachedFrameCount == 0 && pWav->ima.bytesRemainingInBlock == 0) {
+            if (pWav->channels == 1) {
+                /* Mono. */
+                drwav_uint8 header[4];
+                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
+                    return totalFramesRead;
+                }
+                pWav->ima.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
+
+                pWav->ima.predictor[0] = drwav__bytes_to_s16(header + 0);
+                pWav->ima.stepIndex[0] = header[2];
+                pWav->ima.cachedFrames[drwav_countof(pWav->ima.cachedFrames) - 1] = pWav->ima.predictor[0];
+                pWav->ima.cachedFrameCount = 1;
+            } else {
+                /* Stereo. */
+                drwav_uint8 header[8];
+                if (pWav->onRead(pWav->pUserData, header, sizeof(header)) != sizeof(header)) {
+                    return totalFramesRead;
+                }
+                pWav->ima.bytesRemainingInBlock = pWav->fmt.blockAlign - sizeof(header);
+
+                pWav->ima.predictor[0] = drwav__bytes_to_s16(header + 0);
+                pWav->ima.stepIndex[0] = header[2];
+                pWav->ima.predictor[1] = drwav__bytes_to_s16(header + 4);
+                pWav->ima.stepIndex[1] = header[6];
+
+                pWav->ima.cachedFrames[drwav_countof(pWav->ima.cachedFrames) - 2] = pWav->ima.predictor[0];
+                pWav->ima.cachedFrames[drwav_countof(pWav->ima.cachedFrames) - 1] = pWav->ima.predictor[1];
+                pWav->ima.cachedFrameCount = 1;
+            }
+        }
+
+        /* Output anything that's cached. */
+        while (framesToRead > 0 && pWav->ima.cachedFrameCount > 0 && pWav->compressed.iCurrentPCMFrame < pWav->totalPCMFrameCount) {
+            drwav_uint32 iSample;
+            for (iSample = 0; iSample < pWav->channels; iSample += 1) {
+                pBufferOut[iSample] = (drwav_int16)pWav->ima.cachedFrames[(drwav_countof(pWav->ima.cachedFrames) - (pWav->ima.cachedFrameCount*pWav->channels)) + iSample];
+            }
+
+            pBufferOut      += pWav->channels;
+            framesToRead    -= 1;
+            totalFramesRead += 1;
+            pWav->compressed.iCurrentPCMFrame += 1;
+            pWav->ima.cachedFrameCount -= 1;
+        }
+
+        if (framesToRead == 0) {
+            return totalFramesRead;
+        }
+
+        /*
+        If there's nothing left in the cache, just go ahead and load more. If there's nothing left to load in the current block we just continue to the next
+        loop iteration which will trigger the loading of a new block.
+        */
+        if (pWav->ima.cachedFrameCount == 0) {
+            if (pWav->ima.bytesRemainingInBlock == 0) {
+                continue;
+            } else {
+                static drwav_int32 indexTable[16] = {
+                    -1, -1, -1, -1, 2, 4, 6, 8,
+                    -1, -1, -1, -1, 2, 4, 6, 8
+                };
+
+                static drwav_int32 stepTable[89] = {
+                    7,     8,     9,     10,    11,    12,    13,    14,    16,    17, 
+                    19,    21,    23,    25,    28,    31,    34,    37,    41,    45, 
+                    50,    55,    60,    66,    73,    80,    88,    97,    107,   118, 
+                    130,   143,   157,   173,   190,   209,   230,   253,   279,   307,
+                    337,   371,   408,   449,   494,   544,   598,   658,   724,   796,
+                    876,   963,   1060,  1166,  1282,  1411,  1552,  1707,  1878,  2066, 
+                    2272,  2499,  2749,  3024,  3327,  3660,  4026,  4428,  4871,  5358,
+                    5894,  6484,  7132,  7845,  8630,  9493,  10442, 11487, 12635, 13899, 
+                    15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767 
+                };
+
+                drwav_uint32 iChannel;
+
+                /*
+                From what I can tell with stereo streams, it looks like every 4 bytes (8 samples) is for one channel. So it goes 4 bytes for the
+                left channel, 4 bytes for the right channel.
+                */
+                pWav->ima.cachedFrameCount = 8;
+                for (iChannel = 0; iChannel < pWav->channels; ++iChannel) {
+                    drwav_uint32 iByte;
+                    drwav_uint8 nibbles[4];
+                    if (pWav->onRead(pWav->pUserData, &nibbles, 4) != 4) {
+                        pWav->ima.cachedFrameCount = 0;
+                        return totalFramesRead;
+                    }
+                    pWav->ima.bytesRemainingInBlock -= 4;
+
+                    for (iByte = 0; iByte < 4; ++iByte) {
+                        drwav_uint8 nibble0 = ((nibbles[iByte] & 0x0F) >> 0);
+                        drwav_uint8 nibble1 = ((nibbles[iByte] & 0xF0) >> 4);
+
+                        drwav_int32 step      = stepTable[pWav->ima.stepIndex[iChannel]];
+                        drwav_int32 predictor = pWav->ima.predictor[iChannel];
+
+                        drwav_int32      diff  = step >> 3;
+                        if (nibble0 & 1) diff += step >> 2;
+                        if (nibble0 & 2) diff += step >> 1;
+                        if (nibble0 & 4) diff += step;
+                        if (nibble0 & 8) diff  = -diff;
+
+                        predictor = drwav_clamp(predictor + diff, -32768, 32767);
+                        pWav->ima.predictor[iChannel] = predictor;
+                        pWav->ima.stepIndex[iChannel] = drwav_clamp(pWav->ima.stepIndex[iChannel] + indexTable[nibble0], 0, (drwav_int32)drwav_countof(stepTable)-1);
+                        pWav->ima.cachedFrames[(drwav_countof(pWav->ima.cachedFrames) - (pWav->ima.cachedFrameCount*pWav->channels)) + (iByte*2+0)*pWav->channels + iChannel] = predictor;
+
+
+                        step      = stepTable[pWav->ima.stepIndex[iChannel]];
+                        predictor = pWav->ima.predictor[iChannel];
+
+                                         diff  = step >> 3;
+                        if (nibble1 & 1) diff += step >> 2;
+                        if (nibble1 & 2) diff += step >> 1;
+                        if (nibble1 & 4) diff += step;
+                        if (nibble1 & 8) diff  = -diff;
+
+                        predictor = drwav_clamp(predictor + diff, -32768, 32767);
+                        pWav->ima.predictor[iChannel] = predictor;
+                        pWav->ima.stepIndex[iChannel] = drwav_clamp(pWav->ima.stepIndex[iChannel] + indexTable[nibble1], 0, (drwav_int32)drwav_countof(stepTable)-1);
+                        pWav->ima.cachedFrames[(drwav_countof(pWav->ima.cachedFrames) - (pWav->ima.cachedFrameCount*pWav->channels)) + (iByte*2+1)*pWav->channels + iChannel] = predictor;
+                    }
+                }
+            }
+        }
+    }
+
+    return totalFramesRead;
+}
+
+
+#ifndef DR_WAV_NO_CONVERSION_API
+static unsigned short g_drwavAlawTable[256] = {
+    0xEA80, 0xEB80, 0xE880, 0xE980, 0xEE80, 0xEF80, 0xEC80, 0xED80, 0xE280, 0xE380, 0xE080, 0xE180, 0xE680, 0xE780, 0xE480, 0xE580, 
+    0xF540, 0xF5C0, 0xF440, 0xF4C0, 0xF740, 0xF7C0, 0xF640, 0xF6C0, 0xF140, 0xF1C0, 0xF040, 0xF0C0, 0xF340, 0xF3C0, 0xF240, 0xF2C0, 
+    0xAA00, 0xAE00, 0xA200, 0xA600, 0xBA00, 0xBE00, 0xB200, 0xB600, 0x8A00, 0x8E00, 0x8200, 0x8600, 0x9A00, 0x9E00, 0x9200, 0x9600, 
+    0xD500, 0xD700, 0xD100, 0xD300, 0xDD00, 0xDF00, 0xD900, 0xDB00, 0xC500, 0xC700, 0xC100, 0xC300, 0xCD00, 0xCF00, 0xC900, 0xCB00, 
+    0xFEA8, 0xFEB8, 0xFE88, 0xFE98, 0xFEE8, 0xFEF8, 0xFEC8, 0xFED8, 0xFE28, 0xFE38, 0xFE08, 0xFE18, 0xFE68, 0xFE78, 0xFE48, 0xFE58, 
+    0xFFA8, 0xFFB8, 0xFF88, 0xFF98, 0xFFE8, 0xFFF8, 0xFFC8, 0xFFD8, 0xFF28, 0xFF38, 0xFF08, 0xFF18, 0xFF68, 0xFF78, 0xFF48, 0xFF58, 
+    0xFAA0, 0xFAE0, 0xFA20, 0xFA60, 0xFBA0, 0xFBE0, 0xFB20, 0xFB60, 0xF8A0, 0xF8E0, 0xF820, 0xF860, 0xF9A0, 0xF9E0, 0xF920, 0xF960, 
+    0xFD50, 0xFD70, 0xFD10, 0xFD30, 0xFDD0, 0xFDF0, 0xFD90, 0xFDB0, 0xFC50, 0xFC70, 0xFC10, 0xFC30, 0xFCD0, 0xFCF0, 0xFC90, 0xFCB0, 
+    0x1580, 0x1480, 0x1780, 0x1680, 0x1180, 0x1080, 0x1380, 0x1280, 0x1D80, 0x1C80, 0x1F80, 0x1E80, 0x1980, 0x1880, 0x1B80, 0x1A80, 
+    0x0AC0, 0x0A40, 0x0BC0, 0x0B40, 0x08C0, 0x0840, 0x09C0, 0x0940, 0x0EC0, 0x0E40, 0x0FC0, 0x0F40, 0x0CC0, 0x0C40, 0x0DC0, 0x0D40, 
+    0x5600, 0x5200, 0x5E00, 0x5A00, 0x4600, 0x4200, 0x4E00, 0x4A00, 0x7600, 0x7200, 0x7E00, 0x7A00, 0x6600, 0x6200, 0x6E00, 0x6A00, 
+    0x2B00, 0x2900, 0x2F00, 0x2D00, 0x2300, 0x2100, 0x2700, 0x2500, 0x3B00, 0x3900, 0x3F00, 0x3D00, 0x3300, 0x3100, 0x3700, 0x3500, 
+    0x0158, 0x0148, 0x0178, 0x0168, 0x0118, 0x0108, 0x0138, 0x0128, 0x01D8, 0x01C8, 0x01F8, 0x01E8, 0x0198, 0x0188, 0x01B8, 0x01A8, 
+    0x0058, 0x0048, 0x0078, 0x0068, 0x0018, 0x0008, 0x0038, 0x0028, 0x00D8, 0x00C8, 0x00F8, 0x00E8, 0x0098, 0x0088, 0x00B8, 0x00A8, 
+    0x0560, 0x0520, 0x05E0, 0x05A0, 0x0460, 0x0420, 0x04E0, 0x04A0, 0x0760, 0x0720, 0x07E0, 0x07A0, 0x0660, 0x0620, 0x06E0, 0x06A0, 
+    0x02B0, 0x0290, 0x02F0, 0x02D0, 0x0230, 0x0210, 0x0270, 0x0250, 0x03B0, 0x0390, 0x03F0, 0x03D0, 0x0330, 0x0310, 0x0370, 0x0350
+};
+
+static unsigned short g_drwavMulawTable[256] = {
+    0x8284, 0x8684, 0x8A84, 0x8E84, 0x9284, 0x9684, 0x9A84, 0x9E84, 0xA284, 0xA684, 0xAA84, 0xAE84, 0xB284, 0xB684, 0xBA84, 0xBE84, 
+    0xC184, 0xC384, 0xC584, 0xC784, 0xC984, 0xCB84, 0xCD84, 0xCF84, 0xD184, 0xD384, 0xD584, 0xD784, 0xD984, 0xDB84, 0xDD84, 0xDF84, 
+    0xE104, 0xE204, 0xE304, 0xE404, 0xE504, 0xE604, 0xE704, 0xE804, 0xE904, 0xEA04, 0xEB04, 0xEC04, 0xED04, 0xEE04, 0xEF04, 0xF004, 
+    0xF0C4, 0xF144, 0xF1C4, 0xF244, 0xF2C4, 0xF344, 0xF3C4, 0xF444, 0xF4C4, 0xF544, 0xF5C4, 0xF644, 0xF6C4, 0xF744, 0xF7C4, 0xF844, 
+    0xF8A4, 0xF8E4, 0xF924, 0xF964, 0xF9A4, 0xF9E4, 0xFA24, 0xFA64, 0xFAA4, 0xFAE4, 0xFB24, 0xFB64, 0xFBA4, 0xFBE4, 0xFC24, 0xFC64, 
+    0xFC94, 0xFCB4, 0xFCD4, 0xFCF4, 0xFD14, 0xFD34, 0xFD54, 0xFD74, 0xFD94, 0xFDB4, 0xFDD4, 0xFDF4, 0xFE14, 0xFE34, 0xFE54, 0xFE74, 
+    0xFE8C, 0xFE9C, 0xFEAC, 0xFEBC, 0xFECC, 0xFEDC, 0xFEEC, 0xFEFC, 0xFF0C, 0xFF1C, 0xFF2C, 0xFF3C, 0xFF4C, 0xFF5C, 0xFF6C, 0xFF7C, 
+    0xFF88, 0xFF90, 0xFF98, 0xFFA0, 0xFFA8, 0xFFB0, 0xFFB8, 0xFFC0, 0xFFC8, 0xFFD0, 0xFFD8, 0xFFE0, 0xFFE8, 0xFFF0, 0xFFF8, 0x0000, 
+    0x7D7C, 0x797C, 0x757C, 0x717C, 0x6D7C, 0x697C, 0x657C, 0x617C, 0x5D7C, 0x597C, 0x557C, 0x517C, 0x4D7C, 0x497C, 0x457C, 0x417C, 
+    0x3E7C, 0x3C7C, 0x3A7C, 0x387C, 0x367C, 0x347C, 0x327C, 0x307C, 0x2E7C, 0x2C7C, 0x2A7C, 0x287C, 0x267C, 0x247C, 0x227C, 0x207C, 
+    0x1EFC, 0x1DFC, 0x1CFC, 0x1BFC, 0x1AFC, 0x19FC, 0x18FC, 0x17FC, 0x16FC, 0x15FC, 0x14FC, 0x13FC, 0x12FC, 0x11FC, 0x10FC, 0x0FFC, 
+    0x0F3C, 0x0EBC, 0x0E3C, 0x0DBC, 0x0D3C, 0x0CBC, 0x0C3C, 0x0BBC, 0x0B3C, 0x0ABC, 0x0A3C, 0x09BC, 0x093C, 0x08BC, 0x083C, 0x07BC, 
+    0x075C, 0x071C, 0x06DC, 0x069C, 0x065C, 0x061C, 0x05DC, 0x059C, 0x055C, 0x051C, 0x04DC, 0x049C, 0x045C, 0x041C, 0x03DC, 0x039C, 
+    0x036C, 0x034C, 0x032C, 0x030C, 0x02EC, 0x02CC, 0x02AC, 0x028C, 0x026C, 0x024C, 0x022C, 0x020C, 0x01EC, 0x01CC, 0x01AC, 0x018C, 
+    0x0174, 0x0164, 0x0154, 0x0144, 0x0134, 0x0124, 0x0114, 0x0104, 0x00F4, 0x00E4, 0x00D4, 0x00C4, 0x00B4, 0x00A4, 0x0094, 0x0084, 
+    0x0078, 0x0070, 0x0068, 0x0060, 0x0058, 0x0050, 0x0048, 0x0040, 0x0038, 0x0030, 0x0028, 0x0020, 0x0018, 0x0010, 0x0008, 0x0000
+};
+
+static DRWAV_INLINE drwav_int16 drwav__alaw_to_s16(drwav_uint8 sampleIn)
+{
+    return (short)g_drwavAlawTable[sampleIn];
+}
+
+static DRWAV_INLINE drwav_int16 drwav__mulaw_to_s16(drwav_uint8 sampleIn)
+{
+    return (short)g_drwavMulawTable[sampleIn];
+}
+
+
+
+static void drwav__pcm_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
+{
+    unsigned int i;
+
+    /* Special case for 8-bit sample data because it's treated as unsigned. */
+    if (bytesPerSample == 1) {
+        drwav_u8_to_s16(pOut, pIn, totalSampleCount);
+        return;
+    }
+
+
+    /* Slightly more optimal implementation for common formats. */
+    if (bytesPerSample == 2) {
+        for (i = 0; i < totalSampleCount; ++i) {
+           *pOut++ = ((const drwav_int16*)pIn)[i];
+        }
+        return;
+    }
+    if (bytesPerSample == 3) {
+        drwav_s24_to_s16(pOut, pIn, totalSampleCount);
+        return;
+    }
+    if (bytesPerSample == 4) {
+        drwav_s32_to_s16(pOut, (const drwav_int32*)pIn, totalSampleCount);
+        return;
+    }
+
+
+    /* Anything more than 64 bits per sample is not supported. */
+    if (bytesPerSample > 8) {
+        DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut));
+        return;
+    }
+
+
+    /* Generic, slow converter. */
+    for (i = 0; i < totalSampleCount; ++i) {
+        drwav_uint64 sample = 0;
+        unsigned int shift  = (8 - bytesPerSample) * 8;
+
+        unsigned int j;
+        for (j = 0; j < bytesPerSample; j += 1) {
+            DRWAV_ASSERT(j < 8);
+            sample |= (drwav_uint64)(pIn[j]) << shift;
+            shift  += 8;
+        }
+
+        pIn += j;
+        *pOut++ = (drwav_int16)((drwav_int64)sample >> 48);
+    }
+}
+
+static void drwav__ieee_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
+{
+    if (bytesPerSample == 4) {
+        drwav_f32_to_s16(pOut, (const float*)pIn, totalSampleCount);
+        return;
+    } else if (bytesPerSample == 8) {
+        drwav_f64_to_s16(pOut, (const double*)pIn, totalSampleCount);
+        return;
+    } else {
+        /* Only supporting 32- and 64-bit float. Output silence in all other cases. Contributions welcome for 16-bit float. */
+        DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut));
+        return;
+    }
+}
+
+static drwav_uint64 drwav_read_pcm_frames_s16__pcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint32 bytesPerFrame;
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+
+    /* Fast path. */
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bitsPerSample == 16) {
+        return drwav_read_pcm_frames(pWav, framesToRead, pBufferOut);
+    }
+    
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+    
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav__pcm_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_s16__ieee(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+
+    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+    
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav__ieee_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_s16__alaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+
+    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+    
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav_alaw_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_s16__mulaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+
+    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav_mulaw_to_s16(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) {
+        return 0;
+    }
+
+    /* Don't try to read more samples than can potentially fit in the output buffer. */
+    if (framesToRead * pWav->channels * sizeof(drwav_int16) > DRWAV_SIZE_MAX) {
+        framesToRead = DRWAV_SIZE_MAX / sizeof(drwav_int16) / pWav->channels;
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) {
+        return drwav_read_pcm_frames_s16__pcm(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) {
+        return drwav_read_pcm_frames_s16__ieee(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW) {
+        return drwav_read_pcm_frames_s16__alaw(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) {
+        return drwav_read_pcm_frames_s16__mulaw(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+        return drwav_read_pcm_frames_s16__msadpcm(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+        return drwav_read_pcm_frames_s16__ima(pWav, framesToRead, pBufferOut);
+    }
+
+    return 0;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16le(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, framesToRead, pBufferOut);
+    if (!drwav__is_little_endian()) {
+        drwav__bswap_samples_s16(pBufferOut, framesRead*pWav->channels);
+    }
+
+    return framesRead;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16be(drwav* pWav, drwav_uint64 framesToRead, drwav_int16* pBufferOut)
+{
+    drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, framesToRead, pBufferOut);
+    if (drwav__is_little_endian()) {
+        drwav__bswap_samples_s16(pBufferOut, framesRead*pWav->channels);
+    }
+
+    return framesRead;
+}
+
+
+DRWAV_API void drwav_u8_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    int r;
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
+        int x = pIn[i];
+        r = x << 8;
+        r = r - 32768;
+        pOut[i] = (short)r;
+    }
+}
+
+DRWAV_API void drwav_s24_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    int r;
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
+        int x = ((int)(((unsigned int)(((const drwav_uint8*)pIn)[i*3+0]) << 8) | ((unsigned int)(((const drwav_uint8*)pIn)[i*3+1]) << 16) | ((unsigned int)(((const drwav_uint8*)pIn)[i*3+2])) << 24)) >> 8;
+        r = x >> 8;
+        pOut[i] = (short)r;
+    }
+}
+
+DRWAV_API void drwav_s32_to_s16(drwav_int16* pOut, const drwav_int32* pIn, size_t sampleCount)
+{
+    int r;
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
+        int x = pIn[i];
+        r = x >> 16;
+        pOut[i] = (short)r;
+    }
+}
+
+DRWAV_API void drwav_f32_to_s16(drwav_int16* pOut, const float* pIn, size_t sampleCount)
+{
+    int r;
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
+        float x = pIn[i];
+        float c;
+        c = ((x < -1) ? -1 : ((x > 1) ? 1 : x));
+        c = c + 1;
+        r = (int)(c * 32767.5f);
+        r = r - 32768;
+        pOut[i] = (short)r;
+    }
+}
+
+DRWAV_API void drwav_f64_to_s16(drwav_int16* pOut, const double* pIn, size_t sampleCount)
+{
+    int r;
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
+        double x = pIn[i];
+        double c;
+        c = ((x < -1) ? -1 : ((x > 1) ? 1 : x));
+        c = c + 1;
+        r = (int)(c * 32767.5);
+        r = r - 32768;
+        pOut[i] = (short)r;
+    }
+}
+
+DRWAV_API void drwav_alaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
+        pOut[i] = drwav__alaw_to_s16(pIn[i]);
+    }
+}
+
+DRWAV_API void drwav_mulaw_to_s16(drwav_int16* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+    for (i = 0; i < sampleCount; ++i) {
+        pOut[i] = drwav__mulaw_to_s16(pIn[i]);
+    }
+}
+
+
+
+static void drwav__pcm_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount, unsigned int bytesPerSample)
+{
+    unsigned int i;
+
+    /* Special case for 8-bit sample data because it's treated as unsigned. */
+    if (bytesPerSample == 1) {
+        drwav_u8_to_f32(pOut, pIn, sampleCount);
+        return;
+    }
+
+    /* Slightly more optimal implementation for common formats. */
+    if (bytesPerSample == 2) {
+        drwav_s16_to_f32(pOut, (const drwav_int16*)pIn, sampleCount);
+        return;
+    }
+    if (bytesPerSample == 3) {
+        drwav_s24_to_f32(pOut, pIn, sampleCount);
+        return;
+    }
+    if (bytesPerSample == 4) {
+        drwav_s32_to_f32(pOut, (const drwav_int32*)pIn, sampleCount);
+        return;
+    }
+
+
+    /* Anything more than 64 bits per sample is not supported. */
+    if (bytesPerSample > 8) {
+        DRWAV_ZERO_MEMORY(pOut, sampleCount * sizeof(*pOut));
+        return;
+    }
+
+
+    /* Generic, slow converter. */
+    for (i = 0; i < sampleCount; ++i) {
+        drwav_uint64 sample = 0;
+        unsigned int shift  = (8 - bytesPerSample) * 8;
+
+        unsigned int j;
+        for (j = 0; j < bytesPerSample; j += 1) {
+            DRWAV_ASSERT(j < 8);
+            sample |= (drwav_uint64)(pIn[j]) << shift;
+            shift  += 8;
+        }
+
+        pIn += j;
+        *pOut++ = (float)((drwav_int64)sample / 9223372036854775807.0);
+    }
+}
+
+static void drwav__ieee_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount, unsigned int bytesPerSample)
+{
+    if (bytesPerSample == 4) {
+        unsigned int i;
+        for (i = 0; i < sampleCount; ++i) {
+            *pOut++ = ((const float*)pIn)[i];
+        }
+        return;
+    } else if (bytesPerSample == 8) {
+        drwav_f64_to_f32(pOut, (const double*)pIn, sampleCount);
+        return;
+    } else {
+        /* Only supporting 32- and 64-bit float. Output silence in all other cases. Contributions welcome for 16-bit float. */
+        DRWAV_ZERO_MEMORY(pOut, sampleCount * sizeof(*pOut));
+        return;
+    }
+}
+
+
+static drwav_uint64 drwav_read_pcm_frames_f32__pcm(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+
+    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav__pcm_to_f32(pBufferOut, sampleData, (size_t)framesRead*pWav->channels, bytesPerFrame/pWav->channels);
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_f32__msadpcm(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+{
+    /*
+    We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't
+    want to duplicate that code.
+    */
+    drwav_uint64 totalFramesRead = 0;
+    drwav_int16 samples16[2048];
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav_s16_to_f32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels));   /* <-- Safe cast because we're clamping to 2048. */
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_f32__ima(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+{
+    /*
+    We're just going to borrow the implementation from the drwav_read_s16() since IMA-ADPCM is a little bit more complicated than other formats and I don't
+    want to duplicate that code.
+    */
+    drwav_uint64 totalFramesRead = 0;
+    drwav_int16 samples16[2048];
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav_s16_to_f32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels));   /* <-- Safe cast because we're clamping to 2048. */
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_f32__ieee(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+    drwav_uint32 bytesPerFrame;
+
+    /* Fast path. */
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT && pWav->bitsPerSample == 32) {
+        return drwav_read_pcm_frames(pWav, framesToRead, pBufferOut);
+    }
+    
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav__ieee_to_f32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_f32__alaw(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav_alaw_to_f32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_f32__mulaw(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+
+    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav_mulaw_to_f32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+{
+    if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) {
+        return 0;
+    }
+
+    /* Don't try to read more samples than can potentially fit in the output buffer. */
+    if (framesToRead * pWav->channels * sizeof(float) > DRWAV_SIZE_MAX) {
+        framesToRead = DRWAV_SIZE_MAX / sizeof(float) / pWav->channels;
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) {
+        return drwav_read_pcm_frames_f32__pcm(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+        return drwav_read_pcm_frames_f32__msadpcm(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) {
+        return drwav_read_pcm_frames_f32__ieee(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW) {
+        return drwav_read_pcm_frames_f32__alaw(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) {
+        return drwav_read_pcm_frames_f32__mulaw(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+        return drwav_read_pcm_frames_f32__ima(pWav, framesToRead, pBufferOut);
+    }
+
+    return 0;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32le(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+{
+    drwav_uint64 framesRead = drwav_read_pcm_frames_f32(pWav, framesToRead, pBufferOut);
+    if (!drwav__is_little_endian()) {
+        drwav__bswap_samples_f32(pBufferOut, framesRead*pWav->channels);
+    }
+
+    return framesRead;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32be(drwav* pWav, drwav_uint64 framesToRead, float* pBufferOut)
+{
+    drwav_uint64 framesRead = drwav_read_pcm_frames_f32(pWav, framesToRead, pBufferOut);
+    if (drwav__is_little_endian()) {
+        drwav__bswap_samples_f32(pBufferOut, framesRead*pWav->channels);
+    }
+
+    return framesRead;
+}
+
+
+DRWAV_API void drwav_u8_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+#ifdef DR_WAV_LIBSNDFILE_COMPAT
+    /*
+    It appears libsndfile uses slightly different logic for the u8 -> f32 conversion to dr_wav, which in my opinion is incorrect. It appears
+    libsndfile performs the conversion something like "f32 = (u8 / 256) * 2 - 1", however I think it should be "f32 = (u8 / 255) * 2 - 1" (note
+    the divisor of 256 vs 255). I use libsndfile as a benchmark for testing, so I'm therefore leaving this block here just for my automated
+    correctness testing. This is disabled by default.
+    */
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = (pIn[i] / 256.0f) * 2 - 1;
+    }
+#else
+    for (i = 0; i < sampleCount; ++i) {
+        float x = pIn[i];
+        x = x * 0.00784313725490196078f;    /* 0..255 to 0..2 */
+        x = x - 1;                          /* 0..2 to -1..1 */
+
+        *pOut++ = x;
+    }
+#endif
+}
+
+DRWAV_API void drwav_s16_to_f32(float* pOut, const drwav_int16* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = pIn[i] * 0.000030517578125f;
+    }
+}
+
+DRWAV_API void drwav_s24_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        double x = (double)(((drwav_int32)(((drwav_uint32)(pIn[i*3+0]) << 8) | ((drwav_uint32)(pIn[i*3+1]) << 16) | ((drwav_uint32)(pIn[i*3+2])) << 24)) >> 8);
+        *pOut++ = (float)(x * 0.00000011920928955078125);
+    }
+}
+
+DRWAV_API void drwav_s32_to_f32(float* pOut, const drwav_int32* pIn, size_t sampleCount)
+{
+    size_t i;
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = (float)(pIn[i] / 2147483648.0);
+    }
+}
+
+DRWAV_API void drwav_f64_to_f32(float* pOut, const double* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = (float)pIn[i];
+    }
+}
+
+DRWAV_API void drwav_alaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = drwav__alaw_to_s16(pIn[i]) / 32768.0f;
+    }
+}
+
+DRWAV_API void drwav_mulaw_to_f32(float* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = drwav__mulaw_to_s16(pIn[i]) / 32768.0f;
+    }
+}
+
+
+
+static void drwav__pcm_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
+{
+    unsigned int i;
+
+    /* Special case for 8-bit sample data because it's treated as unsigned. */
+    if (bytesPerSample == 1) {
+        drwav_u8_to_s32(pOut, pIn, totalSampleCount);
+        return;
+    }
+
+    /* Slightly more optimal implementation for common formats. */
+    if (bytesPerSample == 2) {
+        drwav_s16_to_s32(pOut, (const drwav_int16*)pIn, totalSampleCount);
+        return;
+    }
+    if (bytesPerSample == 3) {
+        drwav_s24_to_s32(pOut, pIn, totalSampleCount);
+        return;
+    }
+    if (bytesPerSample == 4) {
+        for (i = 0; i < totalSampleCount; ++i) {
+           *pOut++ = ((const drwav_int32*)pIn)[i];
+        }
+        return;
+    }
+
+
+    /* Anything more than 64 bits per sample is not supported. */
+    if (bytesPerSample > 8) {
+        DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut));
+        return;
+    }
+
+
+    /* Generic, slow converter. */
+    for (i = 0; i < totalSampleCount; ++i) {
+        drwav_uint64 sample = 0;
+        unsigned int shift  = (8 - bytesPerSample) * 8;
+
+        unsigned int j;
+        for (j = 0; j < bytesPerSample; j += 1) {
+            DRWAV_ASSERT(j < 8);
+            sample |= (drwav_uint64)(pIn[j]) << shift;
+            shift  += 8;
+        }
+
+        pIn += j;
+        *pOut++ = (drwav_int32)((drwav_int64)sample >> 32);
+    }
+}
+
+static void drwav__ieee_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t totalSampleCount, unsigned int bytesPerSample)
+{
+    if (bytesPerSample == 4) {
+        drwav_f32_to_s32(pOut, (const float*)pIn, totalSampleCount);
+        return;
+    } else if (bytesPerSample == 8) {
+        drwav_f64_to_s32(pOut, (const double*)pIn, totalSampleCount);
+        return;
+    } else {
+        /* Only supporting 32- and 64-bit float. Output silence in all other cases. Contributions welcome for 16-bit float. */
+        DRWAV_ZERO_MEMORY(pOut, totalSampleCount * sizeof(*pOut));
+        return;
+    }
+}
+
+
+static drwav_uint64 drwav_read_pcm_frames_s32__pcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+    drwav_uint32 bytesPerFrame;
+
+    /* Fast path. */
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM && pWav->bitsPerSample == 32) {
+        return drwav_read_pcm_frames(pWav, framesToRead, pBufferOut);
+    }
+    
+    bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav__pcm_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_s32__msadpcm(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    /*
+    We're just going to borrow the implementation from the drwav_read_s16() since ADPCM is a little bit more complicated than other formats and I don't
+    want to duplicate that code.
+    */
+    drwav_uint64 totalFramesRead = 0;
+    drwav_int16 samples16[2048];
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav_s16_to_s32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels));   /* <-- Safe cast because we're clamping to 2048. */
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_s32__ima(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    /*
+    We're just going to borrow the implementation from the drwav_read_s16() since IMA-ADPCM is a little bit more complicated than other formats and I don't
+    want to duplicate that code.
+    */
+    drwav_uint64 totalFramesRead = 0;
+    drwav_int16 samples16[2048];
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames_s16(pWav, drwav_min(framesToRead, drwav_countof(samples16)/pWav->channels), samples16);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav_s16_to_s32(pBufferOut, samples16, (size_t)(framesRead*pWav->channels));   /* <-- Safe cast because we're clamping to 2048. */
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_s32__ieee(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+
+    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav__ieee_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels), bytesPerFrame/pWav->channels);
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_s32__alaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+
+    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav_alaw_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+static drwav_uint64 drwav_read_pcm_frames_s32__mulaw(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    drwav_uint64 totalFramesRead;
+    drwav_uint8 sampleData[4096];
+
+    drwav_uint32 bytesPerFrame = drwav_get_bytes_per_pcm_frame(pWav);
+    if (bytesPerFrame == 0) {
+        return 0;
+    }
+
+    totalFramesRead = 0;
+
+    while (framesToRead > 0) {
+        drwav_uint64 framesRead = drwav_read_pcm_frames(pWav, drwav_min(framesToRead, sizeof(sampleData)/bytesPerFrame), sampleData);
+        if (framesRead == 0) {
+            break;
+        }
+
+        drwav_mulaw_to_s32(pBufferOut, sampleData, (size_t)(framesRead*pWav->channels));
+
+        pBufferOut      += framesRead*pWav->channels;
+        framesToRead    -= framesRead;
+        totalFramesRead += framesRead;
+    }
+
+    return totalFramesRead;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    if (pWav == NULL || framesToRead == 0 || pBufferOut == NULL) {
+        return 0;
+    }
+
+    /* Don't try to read more samples than can potentially fit in the output buffer. */
+    if (framesToRead * pWav->channels * sizeof(drwav_int32) > DRWAV_SIZE_MAX) {
+        framesToRead = DRWAV_SIZE_MAX / sizeof(drwav_int32) / pWav->channels;
+    }
+
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_PCM) {
+        return drwav_read_pcm_frames_s32__pcm(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ADPCM) {
+        return drwav_read_pcm_frames_s32__msadpcm(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_IEEE_FLOAT) {
+        return drwav_read_pcm_frames_s32__ieee(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_ALAW) {
+        return drwav_read_pcm_frames_s32__alaw(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_MULAW) {
+        return drwav_read_pcm_frames_s32__mulaw(pWav, framesToRead, pBufferOut);
+    }
+
+    if (pWav->translatedFormatTag == DR_WAVE_FORMAT_DVI_ADPCM) {
+        return drwav_read_pcm_frames_s32__ima(pWav, framesToRead, pBufferOut);
+    }
+
+    return 0;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32le(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    drwav_uint64 framesRead = drwav_read_pcm_frames_s32(pWav, framesToRead, pBufferOut);
+    if (!drwav__is_little_endian()) {
+        drwav__bswap_samples_s32(pBufferOut, framesRead*pWav->channels);
+    }
+
+    return framesRead;
+}
+
+DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32be(drwav* pWav, drwav_uint64 framesToRead, drwav_int32* pBufferOut)
+{
+    drwav_uint64 framesRead = drwav_read_pcm_frames_s32(pWav, framesToRead, pBufferOut);
+    if (drwav__is_little_endian()) {
+        drwav__bswap_samples_s32(pBufferOut, framesRead*pWav->channels);
+    }
+
+    return framesRead;
+}
+
+
+DRWAV_API void drwav_u8_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = ((int)pIn[i] - 128) << 24;
+    }
+}
+
+DRWAV_API void drwav_s16_to_s32(drwav_int32* pOut, const drwav_int16* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = pIn[i] << 16;
+    }
+}
+
+DRWAV_API void drwav_s24_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        unsigned int s0 = pIn[i*3 + 0];
+        unsigned int s1 = pIn[i*3 + 1];
+        unsigned int s2 = pIn[i*3 + 2];
+
+        drwav_int32 sample32 = (drwav_int32)((s0 << 8) | (s1 << 16) | (s2 << 24));
+        *pOut++ = sample32;
+    }
+}
+
+DRWAV_API void drwav_f32_to_s32(drwav_int32* pOut, const float* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = (drwav_int32)(2147483648.0 * pIn[i]);
+    }
+}
+
+DRWAV_API void drwav_f64_to_s32(drwav_int32* pOut, const double* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = (drwav_int32)(2147483648.0 * pIn[i]);
+    }
+}
+
+DRWAV_API void drwav_alaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i = 0; i < sampleCount; ++i) {
+        *pOut++ = ((drwav_int32)drwav__alaw_to_s16(pIn[i])) << 16;
+    }
+}
+
+DRWAV_API void drwav_mulaw_to_s32(drwav_int32* pOut, const drwav_uint8* pIn, size_t sampleCount)
+{
+    size_t i;
+
+    if (pOut == NULL || pIn == NULL) {
+        return;
+    }
+
+    for (i= 0; i < sampleCount; ++i) {
+        *pOut++ = ((drwav_int32)drwav__mulaw_to_s16(pIn[i])) << 16;
+    }
+}
+
+
+
+static drwav_int16* drwav__read_pcm_frames_and_close_s16(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount)
+{
+    drwav_uint64 sampleDataSize;
+    drwav_int16* pSampleData;
+    drwav_uint64 framesRead;
+
+    DRWAV_ASSERT(pWav != NULL);
+
+    sampleDataSize = pWav->totalPCMFrameCount * pWav->channels * sizeof(drwav_int16);
+    if (sampleDataSize > DRWAV_SIZE_MAX) {
+        drwav_uninit(pWav);
+        return NULL;    /* File's too big. */
+    }
+
+    pSampleData = (drwav_int16*)drwav__malloc_from_callbacks((size_t)sampleDataSize, &pWav->allocationCallbacks); /* <-- Safe cast due to the check above. */
+    if (pSampleData == NULL) {
+        drwav_uninit(pWav);
+        return NULL;    /* Failed to allocate memory. */
+    }
+
+    framesRead = drwav_read_pcm_frames_s16(pWav, (size_t)pWav->totalPCMFrameCount, pSampleData);
+    if (framesRead != pWav->totalPCMFrameCount) {
+        drwav__free_from_callbacks(pSampleData, &pWav->allocationCallbacks);
+        drwav_uninit(pWav);
+        return NULL;    /* There was an error reading the samples. */
+    }
+
+    drwav_uninit(pWav);
+
+    if (sampleRate) {
+        *sampleRate = pWav->sampleRate;
+    }
+    if (channels) {
+        *channels = pWav->channels;
+    }
+    if (totalFrameCount) {
+        *totalFrameCount = pWav->totalPCMFrameCount;
+    }
+
+    return pSampleData;
+}
+
+static float* drwav__read_pcm_frames_and_close_f32(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount)
+{
+    drwav_uint64 sampleDataSize;
+    float* pSampleData;
+    drwav_uint64 framesRead;
+
+    DRWAV_ASSERT(pWav != NULL);
+
+    sampleDataSize = pWav->totalPCMFrameCount * pWav->channels * sizeof(float);
+    if (sampleDataSize > DRWAV_SIZE_MAX) {
+        drwav_uninit(pWav);
+        return NULL;    /* File's too big. */
+    }
+
+    pSampleData = (float*)drwav__malloc_from_callbacks((size_t)sampleDataSize, &pWav->allocationCallbacks); /* <-- Safe cast due to the check above. */
+    if (pSampleData == NULL) {
+        drwav_uninit(pWav);
+        return NULL;    /* Failed to allocate memory. */
+    }
+
+    framesRead = drwav_read_pcm_frames_f32(pWav, (size_t)pWav->totalPCMFrameCount, pSampleData);
+    if (framesRead != pWav->totalPCMFrameCount) {
+        drwav__free_from_callbacks(pSampleData, &pWav->allocationCallbacks);
+        drwav_uninit(pWav);
+        return NULL;    /* There was an error reading the samples. */
+    }
+
+    drwav_uninit(pWav);
+
+    if (sampleRate) {
+        *sampleRate = pWav->sampleRate;
+    }
+    if (channels) {
+        *channels = pWav->channels;
+    }
+    if (totalFrameCount) {
+        *totalFrameCount = pWav->totalPCMFrameCount;
+    }
+
+    return pSampleData;
+}
+
+static drwav_int32* drwav__read_pcm_frames_and_close_s32(drwav* pWav, unsigned int* channels, unsigned int* sampleRate, drwav_uint64* totalFrameCount)
+{
+    drwav_uint64 sampleDataSize;
+    drwav_int32* pSampleData;
+    drwav_uint64 framesRead;
+
+    DRWAV_ASSERT(pWav != NULL);
+
+    sampleDataSize = pWav->totalPCMFrameCount * pWav->channels * sizeof(drwav_int32);
+    if (sampleDataSize > DRWAV_SIZE_MAX) {
+        drwav_uninit(pWav);
+        return NULL;    /* File's too big. */
+    }
+
+    pSampleData = (drwav_int32*)drwav__malloc_from_callbacks((size_t)sampleDataSize, &pWav->allocationCallbacks); /* <-- Safe cast due to the check above. */
+    if (pSampleData == NULL) {
+        drwav_uninit(pWav);
+        return NULL;    /* Failed to allocate memory. */
+    }
+
+    framesRead = drwav_read_pcm_frames_s32(pWav, (size_t)pWav->totalPCMFrameCount, pSampleData);
+    if (framesRead != pWav->totalPCMFrameCount) {
+        drwav__free_from_callbacks(pSampleData, &pWav->allocationCallbacks);
+        drwav_uninit(pWav);
+        return NULL;    /* There was an error reading the samples. */
+    }
+
+    drwav_uninit(pWav);
+
+    if (sampleRate) {
+        *sampleRate = pWav->sampleRate;
+    }
+    if (channels) {
+        *channels = pWav->channels;
+    }
+    if (totalFrameCount) {
+        *totalFrameCount = pWav->totalPCMFrameCount;
+    }
+
+    return pSampleData;
+}
+
+
+
+DRWAV_API drwav_int16* drwav_open_and_read_pcm_frames_s16(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init(&wav, onRead, onSeek, pUserData, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API float* drwav_open_and_read_pcm_frames_f32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init(&wav, onRead, onSeek, pUserData, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API drwav_int32* drwav_open_and_read_pcm_frames_s32(drwav_read_proc onRead, drwav_seek_proc onSeek, void* pUserData, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init(&wav, onRead, onSeek, pUserData, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+#ifndef DR_WAV_NO_STDIO
+DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_file(&wav, filename, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API float* drwav_open_file_and_read_pcm_frames_f32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_file(&wav, filename, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32(const char* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_file(&wav, filename, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+
+DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_file_w(&wav, filename, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API float* drwav_open_file_and_read_pcm_frames_f32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_file_w(&wav, filename, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32_w(const wchar_t* filename, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_file_w(&wav, filename, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+#endif
+
+DRWAV_API drwav_int16* drwav_open_memory_and_read_pcm_frames_s16(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_memory(&wav, data, dataSize, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s16(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API float* drwav_open_memory_and_read_pcm_frames_f32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_memory(&wav, data, dataSize, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_f32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+
+DRWAV_API drwav_int32* drwav_open_memory_and_read_pcm_frames_s32(const void* data, size_t dataSize, unsigned int* channelsOut, unsigned int* sampleRateOut, drwav_uint64* totalFrameCountOut, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    drwav wav;
+
+    if (channelsOut) {
+        *channelsOut = 0;
+    }
+    if (sampleRateOut) {
+        *sampleRateOut = 0;
+    }
+    if (totalFrameCountOut) {
+        *totalFrameCountOut = 0;
+    }
+
+    if (!drwav_init_memory(&wav, data, dataSize, pAllocationCallbacks)) {
+        return NULL;
+    }
+
+    return drwav__read_pcm_frames_and_close_s32(&wav, channelsOut, sampleRateOut, totalFrameCountOut);
+}
+#endif  /* DR_WAV_NO_CONVERSION_API */
+
+
+DRWAV_API void drwav_free(void* p, const drwav_allocation_callbacks* pAllocationCallbacks)
+{
+    if (pAllocationCallbacks != NULL) {
+        drwav__free_from_callbacks(p, pAllocationCallbacks);
+    } else {
+        drwav__free_default(p, NULL);
+    }
+}
+
+DRWAV_API drwav_uint16 drwav_bytes_to_u16(const drwav_uint8* data)
+{
+    return drwav__bytes_to_u16(data);
+}
+
+DRWAV_API drwav_int16 drwav_bytes_to_s16(const drwav_uint8* data)
+{
+    return drwav__bytes_to_s16(data);
+}
+
+DRWAV_API drwav_uint32 drwav_bytes_to_u32(const drwav_uint8* data)
+{
+    return drwav__bytes_to_u32(data);
+}
+
+DRWAV_API drwav_int32 drwav_bytes_to_s32(const drwav_uint8* data)
+{
+    return drwav__bytes_to_s32(data);
+}
+
+DRWAV_API drwav_uint64 drwav_bytes_to_u64(const drwav_uint8* data)
+{
+    return drwav__bytes_to_u64(data);
+}
+
+DRWAV_API drwav_int64 drwav_bytes_to_s64(const drwav_uint8* data)
+{
+    return drwav__bytes_to_s64(data);
+}
+
+
+DRWAV_API drwav_bool32 drwav_guid_equal(const drwav_uint8 a[16], const drwav_uint8 b[16])
+{
+    return drwav__guid_equal(a, b);
+}
+
+DRWAV_API drwav_bool32 drwav_fourcc_equal(const drwav_uint8* a, const char* b)
+{
+    return drwav__fourcc_equal(a, b);
+}
+
+#endif  /* DR_WAV_IMPLEMENTATION */
+
+/*
+RELEASE NOTES - v0.11.0
+=======================
+Version 0.11.0 has breaking API changes.
+
+Improved Client-Defined Memory Allocation
+-----------------------------------------
+The main change with this release is the addition of a more flexible way of implementing custom memory allocation routines. The
+existing system of DRWAV_MALLOC, DRWAV_REALLOC and DRWAV_FREE are still in place and will be used by default when no custom
+allocation callbacks are specified.
+
+To use the new system, you pass in a pointer to a drwav_allocation_callbacks object to drwav_init() and family, like this:
+
+    void* my_malloc(size_t sz, void* pUserData)
+    {
+        return malloc(sz);
+    }
+    void* my_realloc(void* p, size_t sz, void* pUserData)
+    {
+        return realloc(p, sz);
+    }
+    void my_free(void* p, void* pUserData)
+    {
+        free(p);
+    }
+
+    ...
+
+    drwav_allocation_callbacks allocationCallbacks;
+    allocationCallbacks.pUserData = &myData;
+    allocationCallbacks.onMalloc  = my_malloc;
+    allocationCallbacks.onRealloc = my_realloc;
+    allocationCallbacks.onFree    = my_free;
+    drwav_init_file(&wav, "my_file.wav", &allocationCallbacks);
+
+The advantage of this new system is that it allows you to specify user data which will be passed in to the allocation routines.
+
+Passing in null for the allocation callbacks object will cause dr_wav to use defaults which is the same as DRWAV_MALLOC,
+DRWAV_REALLOC and DRWAV_FREE and the equivalent of how it worked in previous versions.
+
+Every API that opens a drwav object now takes this extra parameter. These include the following:
+
+    drwav_init()
+    drwav_init_ex()
+    drwav_init_file()
+    drwav_init_file_ex()
+    drwav_init_file_w()
+    drwav_init_file_w_ex()
+    drwav_init_memory()
+    drwav_init_memory_ex()
+    drwav_init_write()
+    drwav_init_write_sequential()
+    drwav_init_write_sequential_pcm_frames()
+    drwav_init_file_write()
+    drwav_init_file_write_sequential()
+    drwav_init_file_write_sequential_pcm_frames()
+    drwav_init_file_write_w()
+    drwav_init_file_write_sequential_w()
+    drwav_init_file_write_sequential_pcm_frames_w()
+    drwav_init_memory_write()
+    drwav_init_memory_write_sequential()
+    drwav_init_memory_write_sequential_pcm_frames()
+    drwav_open_and_read_pcm_frames_s16()
+    drwav_open_and_read_pcm_frames_f32()
+    drwav_open_and_read_pcm_frames_s32()
+    drwav_open_file_and_read_pcm_frames_s16()
+    drwav_open_file_and_read_pcm_frames_f32()
+    drwav_open_file_and_read_pcm_frames_s32()
+    drwav_open_file_and_read_pcm_frames_s16_w()
+    drwav_open_file_and_read_pcm_frames_f32_w()
+    drwav_open_file_and_read_pcm_frames_s32_w()
+    drwav_open_memory_and_read_pcm_frames_s16()
+    drwav_open_memory_and_read_pcm_frames_f32()
+    drwav_open_memory_and_read_pcm_frames_s32()
+
+Endian Improvements
+-------------------
+Previously, the following APIs returned little-endian audio data. These now return native-endian data. This improves compatibility
+on big-endian architectures.
+
+    drwav_read_pcm_frames()
+    drwav_read_pcm_frames_s16()
+    drwav_read_pcm_frames_s32()
+    drwav_read_pcm_frames_f32()
+    drwav_open_and_read_pcm_frames_s16()
+    drwav_open_and_read_pcm_frames_s32()
+    drwav_open_and_read_pcm_frames_f32()
+    drwav_open_file_and_read_pcm_frames_s16()
+    drwav_open_file_and_read_pcm_frames_s32()
+    drwav_open_file_and_read_pcm_frames_f32()
+    drwav_open_file_and_read_pcm_frames_s16_w()
+    drwav_open_file_and_read_pcm_frames_s32_w()
+    drwav_open_file_and_read_pcm_frames_f32_w()
+    drwav_open_memory_and_read_pcm_frames_s16()
+    drwav_open_memory_and_read_pcm_frames_s32()
+    drwav_open_memory_and_read_pcm_frames_f32()
+
+APIs have been added to give you explicit control over whether or not audio data is read or written in big- or little-endian byte
+order:
+
+    drwav_read_pcm_frames_le()
+    drwav_read_pcm_frames_be()
+    drwav_read_pcm_frames_s16le()
+    drwav_read_pcm_frames_s16be()
+    drwav_read_pcm_frames_f32le()
+    drwav_read_pcm_frames_f32be()
+    drwav_read_pcm_frames_s32le()
+    drwav_read_pcm_frames_s32be()
+    drwav_write_pcm_frames_le()
+    drwav_write_pcm_frames_be()
+
+Removed APIs
+------------
+The following APIs were deprecated in version 0.10.0 and have now been removed:
+
+    drwav_open()
+    drwav_open_ex()
+    drwav_open_write()
+    drwav_open_write_sequential()
+    drwav_open_file()
+    drwav_open_file_ex()
+    drwav_open_file_write()
+    drwav_open_file_write_sequential()
+    drwav_open_memory()
+    drwav_open_memory_ex()
+    drwav_open_memory_write()
+    drwav_open_memory_write_sequential()
+    drwav_close()
+
+
+
+RELEASE NOTES - v0.10.0
+=======================
+Version 0.10.0 has breaking API changes. There are no significant bug fixes in this release, so if you are affected you do
+not need to upgrade.
+
+Removed APIs
+------------
+The following APIs were deprecated in version 0.9.0 and have been completely removed in version 0.10.0:
+
+    drwav_read()
+    drwav_read_s16()
+    drwav_read_f32()
+    drwav_read_s32()
+    drwav_seek_to_sample()
+    drwav_write()
+    drwav_open_and_read_s16()
+    drwav_open_and_read_f32()
+    drwav_open_and_read_s32()
+    drwav_open_file_and_read_s16()
+    drwav_open_file_and_read_f32()
+    drwav_open_file_and_read_s32()
+    drwav_open_memory_and_read_s16()
+    drwav_open_memory_and_read_f32()
+    drwav_open_memory_and_read_s32()
+    drwav::totalSampleCount
+
+See release notes for version 0.9.0 at the bottom of this file for replacement APIs.
+
+Deprecated APIs
+---------------
+The following APIs have been deprecated. There is a confusing and completely arbitrary difference between drwav_init*() and
+drwav_open*(), where drwav_init*() initializes a pre-allocated drwav object, whereas drwav_open*() will first allocated a
+drwav object on the heap and then initialize it. drwav_open*() has been deprecated which means you must now use a pre-
+allocated drwav object with drwav_init*(). If you need the previous functionality, you can just do a malloc() followed by
+a called to one of the drwav_init*() APIs.
+
+    drwav_open()
+    drwav_open_ex()
+    drwav_open_write()
+    drwav_open_write_sequential()
+    drwav_open_file()
+    drwav_open_file_ex()
+    drwav_open_file_write()
+    drwav_open_file_write_sequential()
+    drwav_open_memory()
+    drwav_open_memory_ex()
+    drwav_open_memory_write()
+    drwav_open_memory_write_sequential()
+    drwav_close()
+
+These APIs will be removed completely in a future version. The rationale for this change is to remove confusion between the
+two different ways to initialize a drwav object.
+*/
+
+/*
+REVISION HISTORY
+================
+v0.12.5 - 2020-05-27
+  - Minor documentation fix.
+
+v0.12.4 - 2020-05-16
+  - Replace assert() with DRWAV_ASSERT().
+  - Add compile-time and run-time version querying.
+    - DRWAV_VERSION_MINOR
+    - DRWAV_VERSION_MAJOR
+    - DRWAV_VERSION_REVISION
+    - DRWAV_VERSION_STRING
+    - drwav_version()
+    - drwav_version_string()
+
+v0.12.3 - 2020-04-30
+  - Fix compilation errors with VC6.
+
+v0.12.2 - 2020-04-21
+  - Fix a bug where drwav_init_file() does not close the file handle after attempting to load an erroneous file.
+
+v0.12.1 - 2020-04-13
+  - Fix some pedantic warnings.
+
+v0.12.0 - 2020-04-04
+  - API CHANGE: Add container and format parameters to the chunk callback.
+  - Minor documentation updates.
+
+v0.11.5 - 2020-03-07
+  - Fix compilation error with Visual Studio .NET 2003.
+
+v0.11.4 - 2020-01-29
+  - Fix some static analysis warnings.
+  - Fix a bug when reading f32 samples from an A-law encoded stream.
+
+v0.11.3 - 2020-01-12
+  - Minor changes to some f32 format conversion routines.
+  - Minor bug fix for ADPCM conversion when end of file is reached.
+
+v0.11.2 - 2019-12-02
+  - Fix a possible crash when using custom memory allocators without a custom realloc() implementation.
+  - Fix an integer overflow bug.
+  - Fix a null pointer dereference bug.
+  - Add limits to sample rate, channels and bits per sample to tighten up some validation.
+
+v0.11.1 - 2019-10-07
+  - Internal code clean up.
+
+v0.11.0 - 2019-10-06
+  - API CHANGE: Add support for user defined memory allocation routines. This system allows the program to specify their own memory allocation
+    routines with a user data pointer for client-specific contextual data. This adds an extra parameter to the end of the following APIs:
+    - drwav_init()
+    - drwav_init_ex()
+    - drwav_init_file()
+    - drwav_init_file_ex()
+    - drwav_init_file_w()
+    - drwav_init_file_w_ex()
+    - drwav_init_memory()
+    - drwav_init_memory_ex()
+    - drwav_init_write()
+    - drwav_init_write_sequential()
+    - drwav_init_write_sequential_pcm_frames()
+    - drwav_init_file_write()
+    - drwav_init_file_write_sequential()
+    - drwav_init_file_write_sequential_pcm_frames()
+    - drwav_init_file_write_w()
+    - drwav_init_file_write_sequential_w()
+    - drwav_init_file_write_sequential_pcm_frames_w()
+    - drwav_init_memory_write()
+    - drwav_init_memory_write_sequential()
+    - drwav_init_memory_write_sequential_pcm_frames()
+    - drwav_open_and_read_pcm_frames_s16()
+    - drwav_open_and_read_pcm_frames_f32()
+    - drwav_open_and_read_pcm_frames_s32()
+    - drwav_open_file_and_read_pcm_frames_s16()
+    - drwav_open_file_and_read_pcm_frames_f32()
+    - drwav_open_file_and_read_pcm_frames_s32()
+    - drwav_open_file_and_read_pcm_frames_s16_w()
+    - drwav_open_file_and_read_pcm_frames_f32_w()
+    - drwav_open_file_and_read_pcm_frames_s32_w()
+    - drwav_open_memory_and_read_pcm_frames_s16()
+    - drwav_open_memory_and_read_pcm_frames_f32()
+    - drwav_open_memory_and_read_pcm_frames_s32()
+    Set this extra parameter to NULL to use defaults which is the same as the previous behaviour. Setting this NULL will use
+    DRWAV_MALLOC, DRWAV_REALLOC and DRWAV_FREE.
+  - Add support for reading and writing PCM frames in an explicit endianness. New APIs:
+    - drwav_read_pcm_frames_le()
+    - drwav_read_pcm_frames_be()
+    - drwav_read_pcm_frames_s16le()
+    - drwav_read_pcm_frames_s16be()
+    - drwav_read_pcm_frames_f32le()
+    - drwav_read_pcm_frames_f32be()
+    - drwav_read_pcm_frames_s32le()
+    - drwav_read_pcm_frames_s32be()
+    - drwav_write_pcm_frames_le()
+    - drwav_write_pcm_frames_be()
+  - Remove deprecated APIs.
+  - API CHANGE: The following APIs now return native-endian data. Previously they returned little-endian data.
+    - drwav_read_pcm_frames()
+    - drwav_read_pcm_frames_s16()
+    - drwav_read_pcm_frames_s32()
+    - drwav_read_pcm_frames_f32()
+    - drwav_open_and_read_pcm_frames_s16()
+    - drwav_open_and_read_pcm_frames_s32()
+    - drwav_open_and_read_pcm_frames_f32()
+    - drwav_open_file_and_read_pcm_frames_s16()
+    - drwav_open_file_and_read_pcm_frames_s32()
+    - drwav_open_file_and_read_pcm_frames_f32()
+    - drwav_open_file_and_read_pcm_frames_s16_w()
+    - drwav_open_file_and_read_pcm_frames_s32_w()
+    - drwav_open_file_and_read_pcm_frames_f32_w()
+    - drwav_open_memory_and_read_pcm_frames_s16()
+    - drwav_open_memory_and_read_pcm_frames_s32()
+    - drwav_open_memory_and_read_pcm_frames_f32()
+
+v0.10.1 - 2019-08-31
+  - Correctly handle partial trailing ADPCM blocks.
+
+v0.10.0 - 2019-08-04
+  - Remove deprecated APIs.
+  - Add wchar_t variants for file loading APIs:
+      drwav_init_file_w()
+      drwav_init_file_ex_w()
+      drwav_init_file_write_w()
+      drwav_init_file_write_sequential_w()
+  - Add drwav_target_write_size_bytes() which calculates the total size in bytes of a WAV file given a format and sample count.
+  - Add APIs for specifying the PCM frame count instead of the sample count when opening in sequential write mode:
+      drwav_init_write_sequential_pcm_frames()
+      drwav_init_file_write_sequential_pcm_frames()
+      drwav_init_file_write_sequential_pcm_frames_w()
+      drwav_init_memory_write_sequential_pcm_frames()
+  - Deprecate drwav_open*() and drwav_close():
+      drwav_open()
+      drwav_open_ex()
+      drwav_open_write()
+      drwav_open_write_sequential()
+      drwav_open_file()
+      drwav_open_file_ex()
+      drwav_open_file_write()
+      drwav_open_file_write_sequential()
+      drwav_open_memory()
+      drwav_open_memory_ex()
+      drwav_open_memory_write()
+      drwav_open_memory_write_sequential()
+      drwav_close()
+  - Minor documentation updates.
+
+v0.9.2 - 2019-05-21
+  - Fix warnings.
+
+v0.9.1 - 2019-05-05
+  - Add support for C89.
+  - Change license to choice of public domain or MIT-0.
+
+v0.9.0 - 2018-12-16
+  - API CHANGE: Add new reading APIs for reading by PCM frames instead of samples. Old APIs have been deprecated and
+    will be removed in v0.10.0. Deprecated APIs and their replacements:
+      drwav_read()                     -> drwav_read_pcm_frames()
+      drwav_read_s16()                 -> drwav_read_pcm_frames_s16()
+      drwav_read_f32()                 -> drwav_read_pcm_frames_f32()
+      drwav_read_s32()                 -> drwav_read_pcm_frames_s32()
+      drwav_seek_to_sample()           -> drwav_seek_to_pcm_frame()
+      drwav_write()                    -> drwav_write_pcm_frames()
+      drwav_open_and_read_s16()        -> drwav_open_and_read_pcm_frames_s16()
+      drwav_open_and_read_f32()        -> drwav_open_and_read_pcm_frames_f32()
+      drwav_open_and_read_s32()        -> drwav_open_and_read_pcm_frames_s32()
+      drwav_open_file_and_read_s16()   -> drwav_open_file_and_read_pcm_frames_s16()
+      drwav_open_file_and_read_f32()   -> drwav_open_file_and_read_pcm_frames_f32()
+      drwav_open_file_and_read_s32()   -> drwav_open_file_and_read_pcm_frames_s32()
+      drwav_open_memory_and_read_s16() -> drwav_open_memory_and_read_pcm_frames_s16()
+      drwav_open_memory_and_read_f32() -> drwav_open_memory_and_read_pcm_frames_f32()
+      drwav_open_memory_and_read_s32() -> drwav_open_memory_and_read_pcm_frames_s32()
+      drwav::totalSampleCount          -> drwav::totalPCMFrameCount
+  - API CHANGE: Rename drwav_open_and_read_file_*() to drwav_open_file_and_read_*().
+  - API CHANGE: Rename drwav_open_and_read_memory_*() to drwav_open_memory_and_read_*().
+  - Add built-in support for smpl chunks.
+  - Add support for firing a callback for each chunk in the file at initialization time.
+    - This is enabled through the drwav_init_ex(), etc. family of APIs.
+  - Handle invalid FMT chunks more robustly.
+
+v0.8.5 - 2018-09-11
+  - Const correctness.
+  - Fix a potential stack overflow.
+
+v0.8.4 - 2018-08-07
+  - Improve 64-bit detection.
+
+v0.8.3 - 2018-08-05
+  - Fix C++ build on older versions of GCC.
+
+v0.8.2 - 2018-08-02
+  - Fix some big-endian bugs.
+
+v0.8.1 - 2018-06-29
+  - Add support for sequential writing APIs.
+  - Disable seeking in write mode.
+  - Fix bugs with Wave64.
+  - Fix typos.
+
+v0.8 - 2018-04-27
+  - Bug fix.
+  - Start using major.minor.revision versioning.
+
+v0.7f - 2018-02-05
+  - Restrict ADPCM formats to a maximum of 2 channels.
+
+v0.7e - 2018-02-02
+  - Fix a crash.
+
+v0.7d - 2018-02-01
+  - Fix a crash.
+
+v0.7c - 2018-02-01
+  - Set drwav.bytesPerSample to 0 for all compressed formats.
+  - Fix a crash when reading 16-bit floating point WAV files. In this case dr_wav will output silence for
+    all format conversion reading APIs (*_s16, *_s32, *_f32 APIs).
+  - Fix some divide-by-zero errors.
+
+v0.7b - 2018-01-22
+  - Fix errors with seeking of compressed formats.
+  - Fix compilation error when DR_WAV_NO_CONVERSION_API
+
+v0.7a - 2017-11-17
+  - Fix some GCC warnings.
+
+v0.7 - 2017-11-04
+  - Add writing APIs.
+
+v0.6 - 2017-08-16
+  - API CHANGE: Rename dr_* types to drwav_*.
+  - Add support for custom implementations of malloc(), realloc(), etc.
+  - Add support for Microsoft ADPCM.
+  - Add support for IMA ADPCM (DVI, format code 0x11).
+  - Optimizations to drwav_read_s16().
+  - Bug fixes.
+
+v0.5g - 2017-07-16
+  - Change underlying type for booleans to unsigned.
+
+v0.5f - 2017-04-04
+  - Fix a minor bug with drwav_open_and_read_s16() and family.
+
+v0.5e - 2016-12-29
+  - Added support for reading samples as signed 16-bit integers. Use the _s16() family of APIs for this.
+  - Minor fixes to documentation.
+
+v0.5d - 2016-12-28
+  - Use drwav_int* and drwav_uint* sized types to improve compiler support.
+
+v0.5c - 2016-11-11
+  - Properly handle JUNK chunks that come before the FMT chunk.
+
+v0.5b - 2016-10-23
+  - A minor change to drwav_bool8 and drwav_bool32 types.
+
+v0.5a - 2016-10-11
+  - Fixed a bug with drwav_open_and_read() and family due to incorrect argument ordering.
+  - Improve A-law and mu-law efficiency.
+
+v0.5 - 2016-09-29
+  - API CHANGE. Swap the order of "channels" and "sampleRate" parameters in drwav_open_and_read*(). Rationale for this is to
+    keep it consistent with dr_audio and dr_flac.
+
+v0.4b - 2016-09-18
+  - Fixed a typo in documentation.
+
+v0.4a - 2016-09-18
+  - Fixed a typo.
+  - Change date format to ISO 8601 (YYYY-MM-DD)
+
+v0.4 - 2016-07-13
+  - API CHANGE. Make onSeek consistent with dr_flac.
+  - API CHANGE. Rename drwav_seek() to drwav_seek_to_sample() for clarity and consistency with dr_flac.
+  - Added support for Sony Wave64.
+
+v0.3a - 2016-05-28
+  - API CHANGE. Return drwav_bool32 instead of int in onSeek callback.
+  - Fixed a memory leak.
+
+v0.3 - 2016-05-22
+  - Lots of API changes for consistency.
+
+v0.2a - 2016-05-16
+  - Fixed Linux/GCC build.
+
+v0.2 - 2016-05-11
+  - Added support for reading data as signed 32-bit PCM for consistency with dr_flac.
+
+v0.1a - 2016-05-07
+  - Fixed a bug in drwav_open_file() where the file handle would not be closed if the loader failed to initialize.
+
+v0.1 - 2016-05-04
+  - Initial versioned release.
+*/
+
+/*
+This software is available as a choice of the following licenses. Choose
+whichever you prefer.
+
+===============================================================================
+ALTERNATIVE 1 - Public Domain (www.unlicense.org)
+===============================================================================
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
+
+===============================================================================
+ALTERNATIVE 2 - MIT No Attribution
+===============================================================================
+Copyright 2020 David Reid
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*/
diff --git a/sys-tune/source/impl/music_player.cpp b/sys-tune/source/impl/music_player.cpp
index dd38534..84eea09 100644
--- a/sys-tune/source/impl/music_player.cpp
+++ b/sys-tune/source/impl/music_player.cpp
@@ -40,12 +40,12 @@ namespace tune::impl {
         bool g_use_title_volume = true;
 
         AudioDriver g_drv;
-        constexpr const int MinSampleCount  = 128;
-        constexpr const int MaxChannelCount = 32;
-        constexpr const int BufferCount     = 3;
+        constexpr const int MinSampleCount  = 256;
+        constexpr const int MaxChannelCount = 8;
+        constexpr const int BufferCount     = 2;
         constexpr const int AudioSampleSize = MinSampleCount * MaxChannelCount * sizeof(s16);
         constexpr const int AudioPoolSize   = AudioSampleSize * BufferCount;
-        alignas(AUDREN_MEMPOOL_ALIGNMENT) u8 AudioMemoryPool[AudioPoolSize*2];
+        alignas(AUDREN_MEMPOOL_ALIGNMENT) u8 AudioMemoryPool[AudioPoolSize];
 
         static_assert((sizeof(AudioMemoryPool) % 0x2000) == 0, "Audio Memory pool needs to be page aligned!");
 
@@ -519,12 +519,35 @@ namespace tune::impl {
 
 
     double GetBass() {
-        return config::get_bass();
+        double t = config::get_bass();
+        double t2;
+        for (int i = 0; i < 24; i++)
+            for (int j = 0; j <24; j++)
+            {
+                t2 = t;
+                t = g_drv.in_mixes[0].mix[i][j];
+                g_drv.in_mixes[0].mix[i][j] = t * t2;
+                t = VOLUME_MAX*((config::get_bass()*10)*(((i+config::get_bass())*config::get_bass()) - (config::get_bass()*(j+config::get_bass())))/10)/15;
+                g_drv.in_mixes[0].mix[i][j] = t * g_drv.in_mixes[0].mix[i][j];
+            }
+        return t;
     }
 
     void SetBass(double bass) {
         //volume = std::clamp(volume, (double)0.0, VOLUME_MAX);
-        g_tune_bass = g_drv.in_mixes[0].bass = bass;
+        double t;
+        double t2;
+        for (int i = 0; i < 24; i++)
+            for (int j = 0; j <24; j++)
+            {
+                t2 = t;
+                t = g_drv.in_mixes[0].mix[i][j];
+                g_drv.in_mixes[0].mix[i][j] = t * t2;
+                t = VOLUME_MAX*((bass*10)*(((i+bass)*bass) - (bass*(j+bass)))/10)/15;
+                g_drv.in_mixes[0].mix[i][j] = t * g_drv.in_mixes[0].mix[i][j];
+            }
+
+        g_tune_bass = bass;
         config::set_bass(bass);
     }
 
diff --git a/sys-tune/source/impl/music_player.cpp.bak b/sys-tune/source/impl/music_player.cpp.bak
index a6d22ae..f7f05d6 100644
--- a/sys-tune/source/impl/music_player.cpp.bak
+++ b/sys-tune/source/impl/music_player.cpp.bak
@@ -40,11 +40,11 @@ namespace tune::impl {
 
         AudioDriver g_drv;
         constexpr const int MinSampleCount  = 256;
-        constexpr const int MaxChannelCount = 16;
-        constexpr const int BufferCount     = 3;
+        constexpr const int MaxChannelCount = 8;
+        constexpr const int BufferCount     = 2;
         constexpr const int AudioSampleSize = MinSampleCount * MaxChannelCount * sizeof(s16);
         constexpr const int AudioPoolSize   = AudioSampleSize * BufferCount;
-        alignas(AUDREN_MEMPOOL_ALIGNMENT) u8 AudioMemoryPool[AudioPoolSize*2];
+        alignas(AUDREN_MEMPOOL_ALIGNMENT) u8 AudioMemoryPool[AudioPoolSize];
 
         static_assert((sizeof(AudioMemoryPool) % 0x2000) == 0, "Audio Memory pool needs to be page aligned!");
 
@@ -521,7 +521,7 @@ namespace tune::impl {
     }
 
     void SetVolume(float volume) {
-        volume = std::clamp(volume, 0.f, VOLUME_MAX*66);
+        volume = std::clamp(volume, 0.f, VOLUME_MAX);
         g_tune_volume = g_drv.in_mixes[0].volume = volume;
         config::set_volume(volume);
     }
diff --git a/sys-tune/source/impl/music_player.hpp b/sys-tune/source/impl/music_player.hpp
index 4e23dad..ce2de45 100644
--- a/sys-tune/source/impl/music_player.hpp
+++ b/sys-tune/source/impl/music_player.hpp
@@ -1,59 +1,59 @@
-#pragma once
-
-#include "../tune_types.hpp"
-#include <string>
-#include <vector>
-
-namespace tune::impl {
-    using PlaylistID = u32;
-
-    struct PlaylistEntry {
-        std::string path;
-        PlaylistID id;
-    };
-
-    Result Initialize(std::vector<PlaylistEntry>* playlist, std::vector<PlaylistID>* shuffle, PlaylistEntry* current);
-    void Exit();
-
-    void TuneThreadFunc(void *);
-    void PscmThreadFunc(void *ptr);
-    void GpioThreadFunc(void *ptr);
-    void PmdmntThreadFunc(void *ptr);
-
-    bool GetStatus();
-    void Play();
-    void Pause();
-    void Next();
-    void Prev();
-
-    double GetBass();
-    void SetBass(double bass);
-    double GetVolume();
-    void SetVolume(double volume);
-    double GetTitleVolume();
-    void SetTitleVolume(double volume);
-    double GetDefaultTitleVolume();
-    void SetDefaultTitleVolume(double volume);
-
-    void TitlePlay();
-    void TitlePause();
-    void DefaultTitlePlay();
-    void DefaultTitlePause();
-
-    RepeatMode GetRepeatMode();
-    void SetRepeatMode(RepeatMode mode);
-    ShuffleMode GetShuffleMode();
-    void SetShuffleMode(ShuffleMode mode);
-
-    u32 GetPlaylistSize();
-    u32 GetPlaylistItem(u32 index, char* buffer, size_t buffer_size);
-    Result GetCurrentQueueItem(CurrentStats *out, char* buffer, size_t buffer_size);
-    void ClearQueue();
-    void MoveQueueItem(u32 src, u32 dst);
-    void Select(u32 index);
-    void Seek(u32 position);
-
-    Result Enqueue(const char* buffer, size_t buffer_length, EnqueueType type);
-    Result Remove(u32 index);
-
-}
+#pragma once
+
+#include "../tune_types.hpp"
+#include <string>
+#include <vector>
+
+namespace tune::impl {
+    using PlaylistID = u32;
+
+    struct PlaylistEntry {
+        std::string path;
+        PlaylistID id;
+    };
+
+    Result Initialize(std::vector<PlaylistEntry>* playlist, std::vector<PlaylistID>* shuffle, PlaylistEntry* current);
+    void Exit();
+
+    void TuneThreadFunc(void *);
+    void PscmThreadFunc(void *ptr);
+    void GpioThreadFunc(void *ptr);
+    void PmdmntThreadFunc(void *ptr);
+
+    bool GetStatus();
+    void Play();
+    void Pause();
+    void Next();
+    void Prev();
+
+    double GetBass();
+    void SetBass(double bass);
+    double GetVolume();
+    void SetVolume(double volume);
+    double GetTitleVolume();
+    void SetTitleVolume(double volume);
+    double GetDefaultTitleVolume();
+    void SetDefaultTitleVolume(double volume);
+
+    void TitlePlay();
+    void TitlePause();
+    void DefaultTitlePlay();
+    void DefaultTitlePause();
+
+    RepeatMode GetRepeatMode();
+    void SetRepeatMode(RepeatMode mode);
+    ShuffleMode GetShuffleMode();
+    void SetShuffleMode(ShuffleMode mode);
+
+    u32 GetPlaylistSize();
+    u32 GetPlaylistItem(u32 index, char* buffer, size_t buffer_size);
+    Result GetCurrentQueueItem(CurrentStats *out, char* buffer, size_t buffer_size);
+    void ClearQueue();
+    void MoveQueueItem(u32 src, u32 dst);
+    void Select(u32 index);
+    void Seek(u32 position);
+
+    Result Enqueue(const char* buffer, size_t buffer_length, EnqueueType type);
+    Result Remove(u32 index);
+
+}
diff --git a/sys-tune/source/impl/music_player.hpp.bak b/sys-tune/source/impl/music_player.hpp.bak
index df1db9b..30889a5 100644
--- a/sys-tune/source/impl/music_player.hpp.bak
+++ b/sys-tune/source/impl/music_player.hpp.bak
@@ -1,57 +1,57 @@
-#pragma once
-
-#include "../tune_types.hpp"
-#include <string>
-#include <vector>
-
-namespace tune::impl {
-    using PlaylistID = u32;
-
-    struct PlaylistEntry {
-        std::string path;
-        PlaylistID id;
-    };
-
-    Result Initialize(std::vector<PlaylistEntry>* playlist, std::vector<PlaylistID>* shuffle, PlaylistEntry* current);
-    void Exit();
-
-    void TuneThreadFunc(void *);
-    void PscmThreadFunc(void *ptr);
-    void GpioThreadFunc(void *ptr);
-    void PmdmntThreadFunc(void *ptr);
-
-    bool GetStatus();
-    void Play();
-    void Pause();
-    void Next();
-    void Prev();
-
-    float GetVolume();
-    void SetVolume(float volume);
-    float GetTitleVolume();
-    void SetTitleVolume(float volume);
-    float GetDefaultTitleVolume();
-    void SetDefaultTitleVolume(float volume);
-
-    void TitlePlay();
-    void TitlePause();
-    void DefaultTitlePlay();
-    void DefaultTitlePause();
-
-    RepeatMode GetRepeatMode();
-    void SetRepeatMode(RepeatMode mode);
-    ShuffleMode GetShuffleMode();
-    void SetShuffleMode(ShuffleMode mode);
-
-    u32 GetPlaylistSize();
-    u32 GetPlaylistItem(u32 index, char* buffer, size_t buffer_size);
-    Result GetCurrentQueueItem(CurrentStats *out, char* buffer, size_t buffer_size);
-    void ClearQueue();
-    void MoveQueueItem(u32 src, u32 dst);
-    void Select(u32 index);
-    void Seek(u32 position);
-
-    Result Enqueue(const char* buffer, size_t buffer_length, EnqueueType type);
-    Result Remove(u32 index);
-
-}
+#pragma once
+
+#include "../tune_types.hpp"
+#include <string>
+#include <vector>
+
+namespace tune::impl {
+    using PlaylistID = u32;
+
+    struct PlaylistEntry {
+        std::string path;
+        PlaylistID id;
+    };
+
+    Result Initialize(std::vector<PlaylistEntry>* playlist, std::vector<PlaylistID>* shuffle, PlaylistEntry* current);
+    void Exit();
+
+    void TuneThreadFunc(void *);
+    void PscmThreadFunc(void *ptr);
+    void GpioThreadFunc(void *ptr);
+    void PmdmntThreadFunc(void *ptr);
+
+    bool GetStatus();
+    void Play();
+    void Pause();
+    void Next();
+    void Prev();
+
+    float GetVolume();
+    void SetVolume(float volume);
+    float GetTitleVolume();
+    void SetTitleVolume(float volume);
+    float GetDefaultTitleVolume();
+    void SetDefaultTitleVolume(float volume);
+
+    void TitlePlay();
+    void TitlePause();
+    void DefaultTitlePlay();
+    void DefaultTitlePause();
+
+    RepeatMode GetRepeatMode();
+    void SetRepeatMode(RepeatMode mode);
+    ShuffleMode GetShuffleMode();
+    void SetShuffleMode(ShuffleMode mode);
+
+    u32 GetPlaylistSize();
+    u32 GetPlaylistItem(u32 index, char* buffer, size_t buffer_size);
+    Result GetCurrentQueueItem(CurrentStats *out, char* buffer, size_t buffer_size);
+    void ClearQueue();
+    void MoveQueueItem(u32 src, u32 dst);
+    void Select(u32 index);
+    void Seek(u32 position);
+
+    Result Enqueue(const char* buffer, size_t buffer_length, EnqueueType type);
+    Result Remove(u32 index);
+
+}
diff --git a/sys-tune/source/tune_service.cpp b/sys-tune/source/tune_service.cpp
index 8d88710..3fb7c21 100644
--- a/sys-tune/source/tune_service.cpp
+++ b/sys-tune/source/tune_service.cpp
@@ -1,170 +1,170 @@
-#include "tune_service.hpp"
-
-#include "impl/music_player.hpp"
-#include "ipc_cmd.h"
-#include "tune_result.hpp"
-#include "tune_types.hpp"
-
-#include <nxExt.h>
-
-#define GET_SINGLE(type, expr)            \
-    ({                                    \
-        *out_dataSize     = sizeof(type); \
-        *(type *)out_data = expr();       \
-        return 0;                         \
-    })
-
-#define SET_SINGLE(type, expr)              \
-    ({                                      \
-        if (r->data.size >= sizeof(type)) { \
-            expr(*(type *)r->data.ptr);     \
-            return 0;                       \
-        }                                   \
-        break;                              \
-    })
-
-namespace tune {
-
-    namespace {
-
-        IpcServer g_server;
-        bool running = true;
-
-        Result ServiceHandlerFunc(void *arg, const IpcServerRequest *r, u8 *out_data, size_t *out_dataSize) {
-            switch (r->data.cmdId) {
-                case TuneIpcCmd_GetStatus:
-                    GET_SINGLE(bool, impl::GetStatus);
-
-                case TuneIpcCmd_Play:
-                    impl::Play();
-                    return 0;
-
-                case TuneIpcCmd_Pause:
-                    impl::Pause();
-                    return 0;
-
-                case TuneIpcCmd_Next:
-                    impl::Next();
-                    return 0;
-
-                case TuneIpcCmd_Prev:
-                    impl::Prev();
-                    return 0;
-
-                case TuneIpcCmd_GetBass:
-                    GET_SINGLE(double, impl::GetBass);
-
-                case TuneIpcCmd_SetBass:
-                    SET_SINGLE(double, impl::SetBass);
-
-                case TuneIpcCmd_GetVolume:
-                    GET_SINGLE(double, impl::GetVolume);
-
-                case TuneIpcCmd_SetVolume:
-                    SET_SINGLE(double, impl::SetVolume);
-
-                case TuneIpcCmd_GetTitleVolume:
-                    GET_SINGLE(double, impl::GetTitleVolume);
-
-                case TuneIpcCmd_SetTitleVolume:
-                    SET_SINGLE(double, impl::SetTitleVolume);
-
-                case TuneIpcCmd_GetDefaultTitleVolume:
-                    GET_SINGLE(double, impl::GetDefaultTitleVolume);
-
-                case TuneIpcCmd_SetDefaultTitleVolume:
-                    SET_SINGLE(double, impl::SetDefaultTitleVolume);
-
-                case TuneIpcCmd_GetRepeatMode:
-                    GET_SINGLE(RepeatMode, impl::GetRepeatMode);
-
-                case TuneIpcCmd_SetRepeatMode:
-                    SET_SINGLE(RepeatMode, impl::SetRepeatMode);
-
-                case TuneIpcCmd_GetShuffleMode:
-                    GET_SINGLE(ShuffleMode, impl::GetShuffleMode);
-
-                case TuneIpcCmd_SetShuffleMode:
-                    SET_SINGLE(ShuffleMode, impl::SetShuffleMode);
-
-                case TuneIpcCmd_GetPlaylistSize:
-                    GET_SINGLE(u32, impl::GetPlaylistSize);
-
-                case TuneIpcCmd_GetPlaylistItem:
-                    if (r->hipc.meta.num_recv_buffers >= 1 && r->data.size >= sizeof(u32)) {
-                        return impl::GetPlaylistItem(
-                            *(u32 *)r->data.ptr,
-                            (char *)hipcGetBufferAddress(r->hipc.data.recv_buffers),
-                            hipcGetBufferSize(r->hipc.data.recv_buffers));
-                    }
-                    break;
-
-                case TuneIpcCmd_GetCurrentQueueItem:
-                    if (r->hipc.meta.num_recv_buffers >= 1) {
-                        *out_dataSize = sizeof(CurrentStats);
-                        return impl::GetCurrentQueueItem(
-                            (CurrentStats *)out_data,
-                            (char *)hipcGetBufferAddress(r->hipc.data.recv_buffers),
-                            hipcGetBufferSize(r->hipc.data.recv_buffers));
-                    }
-                    break;
-
-                case TuneIpcCmd_ClearQueue:
-                    impl::ClearQueue();
-                    return 0;
-
-                case TuneIpcCmd_MoveQueueItem:
-                    if (r->data.size >= 2 * sizeof(u32)) {
-                        u32 *data = (u32 *)r->data.ptr;
-                        impl::MoveQueueItem(data[0], data[1]);
-                        return 0;
-                    }
-                    break;
-
-                case TuneIpcCmd_Select:
-                    SET_SINGLE(u32, impl::Select);
-
-                case TuneIpcCmd_Seek:
-                    SET_SINGLE(u32, impl::Seek);
-
-                case TuneIpcCmd_Enqueue:
-                    if (r->hipc.meta.num_send_buffers >= 1 && r->data.size >= sizeof(EnqueueType)) {
-                        return impl::Enqueue(
-                            (const char *)hipcGetBufferAddress(r->hipc.data.send_buffers),
-                            hipcGetBufferSize(r->hipc.data.send_buffers),
-                            *(EnqueueType *)r->data.ptr);
-                    }
-
-                case TuneIpcCmd_Remove:
-                    SET_SINGLE(u32, impl::Remove);
-
-                case TuneIpcCmd_QuitServer:
-                    running = false;
-                    return 0;
-
-                case TuneIpcCmd_GetApiVersion:
-                    *out_dataSize    = sizeof(u32);
-                    *(u32 *)out_data = TUNE_API_VERSION;
-                    return 0;
-            }
-            return tune::Generic;
-        }
-
-    }
-
-    Result InitializeServer() {
-        return ipcServerInit(&g_server, "tune", 2);
-    }
-
-    Result ExitServer() {
-        return ipcServerExit(&g_server);
-    }
-
-    void LoopProcess() {
-        while (running) {
-            if (ipcServerProcess(&g_server, ServiceHandlerFunc, nullptr) == KERNELRESULT(Cancelled))
-                break;
-        }
-    }
-
-}
+#include "tune_service.hpp"
+
+#include "impl/music_player.hpp"
+#include "ipc_cmd.h"
+#include "tune_result.hpp"
+#include "tune_types.hpp"
+
+#include <nxExt.h>
+
+#define GET_SINGLE(type, expr)            \
+    ({                                    \
+        *out_dataSize     = sizeof(type); \
+        *(type *)out_data = expr();       \
+        return 0;                         \
+    })
+
+#define SET_SINGLE(type, expr)              \
+    ({                                      \
+        if (r->data.size >= sizeof(type)) { \
+            expr(*(type *)r->data.ptr);     \
+            return 0;                       \
+        }                                   \
+        break;                              \
+    })
+
+namespace tune {
+
+    namespace {
+
+        IpcServer g_server;
+        bool running = true;
+
+        Result ServiceHandlerFunc(void *arg, const IpcServerRequest *r, u8 *out_data, size_t *out_dataSize) {
+            switch (r->data.cmdId) {
+                case TuneIpcCmd_GetStatus:
+                    GET_SINGLE(bool, impl::GetStatus);
+
+                case TuneIpcCmd_Play:
+                    impl::Play();
+                    return 0;
+
+                case TuneIpcCmd_Pause:
+                    impl::Pause();
+                    return 0;
+
+                case TuneIpcCmd_Next:
+                    impl::Next();
+                    return 0;
+
+                case TuneIpcCmd_Prev:
+                    impl::Prev();
+                    return 0;
+
+                case TuneIpcCmd_GetBass:
+                    GET_SINGLE(double, impl::GetBass);
+
+                case TuneIpcCmd_SetBass:
+                    SET_SINGLE(double, impl::SetBass);
+
+                case TuneIpcCmd_GetVolume:
+                    GET_SINGLE(double, impl::GetVolume);
+
+                case TuneIpcCmd_SetVolume:
+                    SET_SINGLE(double, impl::SetVolume);
+
+                case TuneIpcCmd_GetTitleVolume:
+                    GET_SINGLE(double, impl::GetTitleVolume);
+
+                case TuneIpcCmd_SetTitleVolume:
+                    SET_SINGLE(double, impl::SetTitleVolume);
+
+                case TuneIpcCmd_GetDefaultTitleVolume:
+                    GET_SINGLE(double, impl::GetDefaultTitleVolume);
+
+                case TuneIpcCmd_SetDefaultTitleVolume:
+                    SET_SINGLE(double, impl::SetDefaultTitleVolume);
+
+                case TuneIpcCmd_GetRepeatMode:
+                    GET_SINGLE(RepeatMode, impl::GetRepeatMode);
+
+                case TuneIpcCmd_SetRepeatMode:
+                    SET_SINGLE(RepeatMode, impl::SetRepeatMode);
+
+                case TuneIpcCmd_GetShuffleMode:
+                    GET_SINGLE(ShuffleMode, impl::GetShuffleMode);
+
+                case TuneIpcCmd_SetShuffleMode:
+                    SET_SINGLE(ShuffleMode, impl::SetShuffleMode);
+
+                case TuneIpcCmd_GetPlaylistSize:
+                    GET_SINGLE(u32, impl::GetPlaylistSize);
+
+                case TuneIpcCmd_GetPlaylistItem:
+                    if (r->hipc.meta.num_recv_buffers >= 1 && r->data.size >= sizeof(u32)) {
+                        return impl::GetPlaylistItem(
+                            *(u32 *)r->data.ptr,
+                            (char *)hipcGetBufferAddress(r->hipc.data.recv_buffers),
+                            hipcGetBufferSize(r->hipc.data.recv_buffers));
+                    }
+                    break;
+
+                case TuneIpcCmd_GetCurrentQueueItem:
+                    if (r->hipc.meta.num_recv_buffers >= 1) {
+                        *out_dataSize = sizeof(CurrentStats);
+                        return impl::GetCurrentQueueItem(
+                            (CurrentStats *)out_data,
+                            (char *)hipcGetBufferAddress(r->hipc.data.recv_buffers),
+                            hipcGetBufferSize(r->hipc.data.recv_buffers));
+                    }
+                    break;
+
+                case TuneIpcCmd_ClearQueue:
+                    impl::ClearQueue();
+                    return 0;
+
+                case TuneIpcCmd_MoveQueueItem:
+                    if (r->data.size >= 2 * sizeof(u32)) {
+                        u32 *data = (u32 *)r->data.ptr;
+                        impl::MoveQueueItem(data[0], data[1]);
+                        return 0;
+                    }
+                    break;
+
+                case TuneIpcCmd_Select:
+                    SET_SINGLE(u32, impl::Select);
+
+                case TuneIpcCmd_Seek:
+                    SET_SINGLE(u32, impl::Seek);
+
+                case TuneIpcCmd_Enqueue:
+                    if (r->hipc.meta.num_send_buffers >= 1 && r->data.size >= sizeof(EnqueueType)) {
+                        return impl::Enqueue(
+                            (const char *)hipcGetBufferAddress(r->hipc.data.send_buffers),
+                            hipcGetBufferSize(r->hipc.data.send_buffers),
+                            *(EnqueueType *)r->data.ptr);
+                    }
+
+                case TuneIpcCmd_Remove:
+                    SET_SINGLE(u32, impl::Remove);
+
+                case TuneIpcCmd_QuitServer:
+                    running = false;
+                    return 0;
+
+                case TuneIpcCmd_GetApiVersion:
+                    *out_dataSize    = sizeof(u32);
+                    *(u32 *)out_data = TUNE_API_VERSION;
+                    return 0;
+            }
+            return tune::Generic;
+        }
+
+    }
+
+    Result InitializeServer() {
+        return ipcServerInit(&g_server, "tune", 2);
+    }
+
+    Result ExitServer() {
+        return ipcServerExit(&g_server);
+    }
+
+    void LoopProcess() {
+        while (running) {
+            if (ipcServerProcess(&g_server, ServiceHandlerFunc, nullptr) == KERNELRESULT(Cancelled))
+                break;
+        }
+    }
+
+}
diff --git a/sys-tune/source/tune_service.cpp.bak b/sys-tune/source/tune_service.cpp.bak
index 4a4530b..e3dcf70 100644
--- a/sys-tune/source/tune_service.cpp.bak
+++ b/sys-tune/source/tune_service.cpp.bak
@@ -1,164 +1,164 @@
-#include "tune_service.hpp"
-
-#include "impl/music_player.hpp"
-#include "ipc_cmd.h"
-#include "tune_result.hpp"
-#include "tune_types.hpp"
-
-#include <nxExt.h>
-
-#define GET_SINGLE(type, expr)            \
-    ({                                    \
-        *out_dataSize     = sizeof(type); \
-        *(type *)out_data = expr();       \
-        return 0;                         \
-    })
-
-#define SET_SINGLE(type, expr)              \
-    ({                                      \
-        if (r->data.size >= sizeof(type)) { \
-            expr(*(type *)r->data.ptr);     \
-            return 0;                       \
-        }                                   \
-        break;                              \
-    })
-
-namespace tune {
-
-    namespace {
-
-        IpcServer g_server;
-        bool running = true;
-
-        Result ServiceHandlerFunc(void *arg, const IpcServerRequest *r, u8 *out_data, size_t *out_dataSize) {
-            switch (r->data.cmdId) {
-                case TuneIpcCmd_GetStatus:
-                    GET_SINGLE(bool, impl::GetStatus);
-
-                case TuneIpcCmd_Play:
-                    impl::Play();
-                    return 0;
-
-                case TuneIpcCmd_Pause:
-                    impl::Pause();
-                    return 0;
-
-                case TuneIpcCmd_Next:
-                    impl::Next();
-                    return 0;
-
-                case TuneIpcCmd_Prev:
-                    impl::Prev();
-                    return 0;
-
-                case TuneIpcCmd_GetVolume:
-                    GET_SINGLE(float, impl::GetVolume);
-
-                case TuneIpcCmd_SetVolume:
-                    SET_SINGLE(float, impl::SetVolume);
-
-                case TuneIpcCmd_GetTitleVolume:
-                    GET_SINGLE(float, impl::GetTitleVolume);
-
-                case TuneIpcCmd_SetTitleVolume:
-                    SET_SINGLE(float, impl::SetTitleVolume);
-
-                case TuneIpcCmd_GetDefaultTitleVolume:
-                    GET_SINGLE(float, impl::GetDefaultTitleVolume);
-
-                case TuneIpcCmd_SetDefaultTitleVolume:
-                    SET_SINGLE(float, impl::SetDefaultTitleVolume);
-
-                case TuneIpcCmd_GetRepeatMode:
-                    GET_SINGLE(RepeatMode, impl::GetRepeatMode);
-
-                case TuneIpcCmd_SetRepeatMode:
-                    SET_SINGLE(RepeatMode, impl::SetRepeatMode);
-
-                case TuneIpcCmd_GetShuffleMode:
-                    GET_SINGLE(ShuffleMode, impl::GetShuffleMode);
-
-                case TuneIpcCmd_SetShuffleMode:
-                    SET_SINGLE(ShuffleMode, impl::SetShuffleMode);
-
-                case TuneIpcCmd_GetPlaylistSize:
-                    GET_SINGLE(u32, impl::GetPlaylistSize);
-
-                case TuneIpcCmd_GetPlaylistItem:
-                    if (r->hipc.meta.num_recv_buffers >= 1 && r->data.size >= sizeof(u32)) {
-                        return impl::GetPlaylistItem(
-                            *(u32 *)r->data.ptr,
-                            (char *)hipcGetBufferAddress(r->hipc.data.recv_buffers),
-                            hipcGetBufferSize(r->hipc.data.recv_buffers));
-                    }
-                    break;
-
-                case TuneIpcCmd_GetCurrentQueueItem:
-                    if (r->hipc.meta.num_recv_buffers >= 1) {
-                        *out_dataSize = sizeof(CurrentStats);
-                        return impl::GetCurrentQueueItem(
-                            (CurrentStats *)out_data,
-                            (char *)hipcGetBufferAddress(r->hipc.data.recv_buffers),
-                            hipcGetBufferSize(r->hipc.data.recv_buffers));
-                    }
-                    break;
-
-                case TuneIpcCmd_ClearQueue:
-                    impl::ClearQueue();
-                    return 0;
-
-                case TuneIpcCmd_MoveQueueItem:
-                    if (r->data.size >= 2 * sizeof(u32)) {
-                        u32 *data = (u32 *)r->data.ptr;
-                        impl::MoveQueueItem(data[0], data[1]);
-                        return 0;
-                    }
-                    break;
-
-                case TuneIpcCmd_Select:
-                    SET_SINGLE(u32, impl::Select);
-
-                case TuneIpcCmd_Seek:
-                    SET_SINGLE(u32, impl::Seek);
-
-                case TuneIpcCmd_Enqueue:
-                    if (r->hipc.meta.num_send_buffers >= 1 && r->data.size >= sizeof(EnqueueType)) {
-                        return impl::Enqueue(
-                            (const char *)hipcGetBufferAddress(r->hipc.data.send_buffers),
-                            hipcGetBufferSize(r->hipc.data.send_buffers),
-                            *(EnqueueType *)r->data.ptr);
-                    }
-
-                case TuneIpcCmd_Remove:
-                    SET_SINGLE(u32, impl::Remove);
-
-                case TuneIpcCmd_QuitServer:
-                    running = false;
-                    return 0;
-
-                case TuneIpcCmd_GetApiVersion:
-                    *out_dataSize    = sizeof(u32);
-                    *(u32 *)out_data = TUNE_API_VERSION;
-                    return 0;
-            }
-            return tune::Generic;
-        }
-
-    }
-
-    Result InitializeServer() {
-        return ipcServerInit(&g_server, "tune", 2);
-    }
-
-    Result ExitServer() {
-        return ipcServerExit(&g_server);
-    }
-
-    void LoopProcess() {
-        while (running) {
-            if (ipcServerProcess(&g_server, ServiceHandlerFunc, nullptr) == KERNELRESULT(Cancelled))
-                break;
-        }
-    }
-
-}
+#include "tune_service.hpp"
+
+#include "impl/music_player.hpp"
+#include "ipc_cmd.h"
+#include "tune_result.hpp"
+#include "tune_types.hpp"
+
+#include <nxExt.h>
+
+#define GET_SINGLE(type, expr)            \
+    ({                                    \
+        *out_dataSize     = sizeof(type); \
+        *(type *)out_data = expr();       \
+        return 0;                         \
+    })
+
+#define SET_SINGLE(type, expr)              \
+    ({                                      \
+        if (r->data.size >= sizeof(type)) { \
+            expr(*(type *)r->data.ptr);     \
+            return 0;                       \
+        }                                   \
+        break;                              \
+    })
+
+namespace tune {
+
+    namespace {
+
+        IpcServer g_server;
+        bool running = true;
+
+        Result ServiceHandlerFunc(void *arg, const IpcServerRequest *r, u8 *out_data, size_t *out_dataSize) {
+            switch (r->data.cmdId) {
+                case TuneIpcCmd_GetStatus:
+                    GET_SINGLE(bool, impl::GetStatus);
+
+                case TuneIpcCmd_Play:
+                    impl::Play();
+                    return 0;
+
+                case TuneIpcCmd_Pause:
+                    impl::Pause();
+                    return 0;
+
+                case TuneIpcCmd_Next:
+                    impl::Next();
+                    return 0;
+
+                case TuneIpcCmd_Prev:
+                    impl::Prev();
+                    return 0;
+
+                case TuneIpcCmd_GetVolume:
+                    GET_SINGLE(float, impl::GetVolume);
+
+                case TuneIpcCmd_SetVolume:
+                    SET_SINGLE(float, impl::SetVolume);
+
+                case TuneIpcCmd_GetTitleVolume:
+                    GET_SINGLE(float, impl::GetTitleVolume);
+
+                case TuneIpcCmd_SetTitleVolume:
+                    SET_SINGLE(float, impl::SetTitleVolume);
+
+                case TuneIpcCmd_GetDefaultTitleVolume:
+                    GET_SINGLE(float, impl::GetDefaultTitleVolume);
+
+                case TuneIpcCmd_SetDefaultTitleVolume:
+                    SET_SINGLE(float, impl::SetDefaultTitleVolume);
+
+                case TuneIpcCmd_GetRepeatMode:
+                    GET_SINGLE(RepeatMode, impl::GetRepeatMode);
+
+                case TuneIpcCmd_SetRepeatMode:
+                    SET_SINGLE(RepeatMode, impl::SetRepeatMode);
+
+                case TuneIpcCmd_GetShuffleMode:
+                    GET_SINGLE(ShuffleMode, impl::GetShuffleMode);
+
+                case TuneIpcCmd_SetShuffleMode:
+                    SET_SINGLE(ShuffleMode, impl::SetShuffleMode);
+
+                case TuneIpcCmd_GetPlaylistSize:
+                    GET_SINGLE(u32, impl::GetPlaylistSize);
+
+                case TuneIpcCmd_GetPlaylistItem:
+                    if (r->hipc.meta.num_recv_buffers >= 1 && r->data.size >= sizeof(u32)) {
+                        return impl::GetPlaylistItem(
+                            *(u32 *)r->data.ptr,
+                            (char *)hipcGetBufferAddress(r->hipc.data.recv_buffers),
+                            hipcGetBufferSize(r->hipc.data.recv_buffers));
+                    }
+                    break;
+
+                case TuneIpcCmd_GetCurrentQueueItem:
+                    if (r->hipc.meta.num_recv_buffers >= 1) {
+                        *out_dataSize = sizeof(CurrentStats);
+                        return impl::GetCurrentQueueItem(
+                            (CurrentStats *)out_data,
+                            (char *)hipcGetBufferAddress(r->hipc.data.recv_buffers),
+                            hipcGetBufferSize(r->hipc.data.recv_buffers));
+                    }
+                    break;
+
+                case TuneIpcCmd_ClearQueue:
+                    impl::ClearQueue();
+                    return 0;
+
+                case TuneIpcCmd_MoveQueueItem:
+                    if (r->data.size >= 2 * sizeof(u32)) {
+                        u32 *data = (u32 *)r->data.ptr;
+                        impl::MoveQueueItem(data[0], data[1]);
+                        return 0;
+                    }
+                    break;
+
+                case TuneIpcCmd_Select:
+                    SET_SINGLE(u32, impl::Select);
+
+                case TuneIpcCmd_Seek:
+                    SET_SINGLE(u32, impl::Seek);
+
+                case TuneIpcCmd_Enqueue:
+                    if (r->hipc.meta.num_send_buffers >= 1 && r->data.size >= sizeof(EnqueueType)) {
+                        return impl::Enqueue(
+                            (const char *)hipcGetBufferAddress(r->hipc.data.send_buffers),
+                            hipcGetBufferSize(r->hipc.data.send_buffers),
+                            *(EnqueueType *)r->data.ptr);
+                    }
+
+                case TuneIpcCmd_Remove:
+                    SET_SINGLE(u32, impl::Remove);
+
+                case TuneIpcCmd_QuitServer:
+                    running = false;
+                    return 0;
+
+                case TuneIpcCmd_GetApiVersion:
+                    *out_dataSize    = sizeof(u32);
+                    *(u32 *)out_data = TUNE_API_VERSION;
+                    return 0;
+            }
+            return tune::Generic;
+        }
+
+    }
+
+    Result InitializeServer() {
+        return ipcServerInit(&g_server, "tune", 2);
+    }
+
+    Result ExitServer() {
+        return ipcServerExit(&g_server);
+    }
+
+    void LoopProcess() {
+        while (running) {
+            if (ipcServerProcess(&g_server, ServiceHandlerFunc, nullptr) == KERNELRESULT(Cancelled))
+                break;
+        }
+    }
+
+}
diff --git a/sys-tune/sys-tune.elf b/sys-tune/sys-tune.elf
new file mode 100644
index 0000000..ce8166a
Binary files /dev/null and b/sys-tune/sys-tune.elf differ
diff --git a/sys-tune/sys-tune.npdm b/sys-tune/sys-tune.npdm
new file mode 100644
index 0000000..89698ce
Binary files /dev/null and b/sys-tune/sys-tune.npdm differ
diff --git a/sys-tune/sys-tune.nso b/sys-tune/sys-tune.nso
new file mode 100644
index 0000000..1ba42fc
Binary files /dev/null and b/sys-tune/sys-tune.nso differ
diff --git a/sys-tune/sys-tune.nsp b/sys-tune/sys-tune.nsp
new file mode 100644
index 0000000..7bbb765
Binary files /dev/null and b/sys-tune/sys-tune.nsp differ
diff --git a/wah.sh b/wah.sh
index f380999..8bb68c7 100644
--- a/wah.sh
+++ b/wah.sh
@@ -1 +1 @@
-make clean; make wah; make dist;
+make clean; make wah; make dist;