feat: archive full dolphin-emu/Sys, add DSP/font/IPL paths to pack

dolphin-emu/Sys/ folder (2562 files) from libretro/dolphin Data/Sys. retroarch.yml: DSP firmware (dsp_coef.bin, dsp_rom.bin), fonts (font_western.bin, font_japanese.bin) at dolphin-emu/Sys/GC/ paths. ref: DolphinLibretro/Boot.cpp:72-73, HW/DSPLLE/DSPHost.cpp, HW/EXI/EXI_DeviceIPL.cpp. pack: 452 files, 0 missing.
2026-06-13 02:25:28 -05:00 · 2026-03-18 15:16:20 +01:00
parent e5681c4ae8
commit bb307aa250
2563 changed files with 123852 additions and 55 deletions
@@ -0,0 +1,54 @@
+void main()
+{
+	//Change this number to increase the pixel size.
+	float pixelSize = 3.0;
+
+	float red	= 0.0;
+	float green	= 0.0;
+	float blue	= 0.0;
+
+	float2 pos = floor(GetCoordinates() * GetResolution() / pixelSize) * pixelSize * GetInvResolution();
+
+	float4 c0 = SampleLocation(pos);
+
+	if (c0.r < 0.1)
+		red = 0.1;
+	else if (c0.r < 0.20)
+		red = 0.20;
+	else if (c0.r < 0.40)
+		red = 0.40;
+	else if (c0.r < 0.60)
+		red = 0.60;
+	else if (c0.r < 0.80)
+		red = 0.80;
+	else
+		red = 1.0;
+
+	if (c0.b < 0.1)
+		blue = 0.1;
+	else if (c0.b < 0.20)
+		blue = 0.20;
+	else if (c0.b < 0.40)
+		blue = 0.40;
+	else if (c0.b < 0.60)
+		blue = 0.60;
+	else if (c0.b < 0.80)
+		blue = 0.80;
+	else
+		blue = 1.0;
+
+	if (c0.g < 0.1)
+		green = 0.1;
+	else if (c0.g < 0.20)
+		green = 0.20;
+	else if (c0.g < 0.40)
+		green = 0.40;
+	else if (c0.g < 0.60)
+		green = 0.60;
+	else if (c0.g < 0.80)
+		green = 0.80;
+	else
+		green = 1.0;
+
+	SetOutput(float4(red, green, blue, c0.a));
+}
@@ -0,0 +1,79 @@
+void main()
+{
+	//Change this number to increase the pixel size.
+	float pixelSize = 2.0;
+
+	float red	= 0.0;
+	float green	= 0.0;
+	float blue	= 0.0;
+
+	float2 pos = floor(GetCoordinates() * GetResolution() / pixelSize) * pixelSize * GetInvResolution();
+
+	float4 c0 = SampleLocation(pos);
+
+	if (c0.r < 0.06)
+		red = 0.06;
+	else if (c0.r < 0.13)
+		red = 0.13;
+	else if (c0.r < 0.26)
+		red = 0.26;
+	else if (c0.r < 0.33)
+		red = 0.33;
+	else if (c0.r < 0.46)
+		red = 0.46;
+	else if (c0.r < 0.60)
+		red = 0.60;
+	else if (c0.r < 0.73)
+		red = 0.73;
+	else if (c0.r < 0.80)
+		red = 0.80;
+	else if (c0.r < 0.93)
+		red = 0.93;
+	else
+		red = 1.0;
+
+	if (c0.b < 0.06)
+		blue = 0.06;
+	else if (c0.b < 0.13)
+		blue = 0.13;
+	else if (c0.b < 0.26)
+		blue = 0.26;
+	else if (c0.b < 0.33)
+		blue = 0.33;
+	else if (c0.b < 0.46)
+		blue = 0.46;
+	else if (c0.b < 0.60)
+		blue = 0.60;
+	else if (c0.b < 0.73)
+		blue = 0.73;
+	else if (c0.b < 0.80)
+		blue = 0.80;
+	else if( c0.b < 0.93)
+		blue = 0.93;
+	else
+		blue = 1.0;
+
+
+	if (c0.g < 0.06)
+		green = 0.06;
+	else if (c0.g < 0.13)
+		green = 0.13;
+	else if (c0.g < 0.26)
+		green = 0.26;
+	else if (c0.g < 0.33)
+		green = 0.33;
+	else if (c0.g < 0.46)
+		green = 0.46;
+	else if (c0.g < 0.60)
+		green = 0.60;
+	else if (c0.g < 0.73)
+		green = 0.73;
+	else if (c0.g < 0.80)
+		green = 0.80;
+	else if( c0.g < 0.93)
+		green = 0.93;
+	else
+		green = 1.0;
+
+	SetOutput(float4(red, green, blue, c0.a));
+}
@@ -0,0 +1,20 @@
+// Anaglyph Amber-Blue shader based on Dubois algorithm
+// Constants taken from the screenshot:
+// "https://www.flickr.com/photos/e_dubois/5230654930/"
+// Eric Dubois
+
+void main()
+{
+	float4 c0 = SampleLayer(0);
+	float4 c1 = SampleLayer(1);
+
+	float3 lr = float3( 1.062,-0.205, 0.299);
+	float3 lg = float3(-0.026, 0.908, 0.068);
+	float3 lb = float3(-0.038,-0.173, 0.022);
+
+	float3 rr = float3(-0.016,-0.123,-0.017);
+	float3 rg = float3( 0.006, 0.062, 0.017);
+	float3 rb = float3(-0.094,-0.185, 0.991);
+
+	SetOutput(float4(dot(lr, c0.rgb) + dot(rr, c1.rgb), dot(lg, c0.rgb) + dot(rg, c1.rgb), dot(lb, c0.rgb) + dot(rb, c1.rgb), c0.a));
+}
@@ -0,0 +1,20 @@
+// Anaglyph Green-Magenta shader based on Dubois algorithm
+// Constants taken from the screenshot:
+// "https://www.flickr.com/photos/e_dubois/5132528166/"
+// Eric Dubois
+
+void main()
+{
+	float4 c0 = SampleLayer(0);
+	float4 c1 = SampleLayer(1);
+
+	float3 lr = float3(-0.062,-0.158,-0.039);
+	float3 lg = float3( 0.284, 0.668, 0.143);
+	float3 lb = float3(-0.015,-0.027, 0.021);
+
+	float3 rr = float3( 0.529, 0.705, 0.024);
+	float3 rg = float3(-0.016,-0.015, 0.065);
+	float3 rb = float3( 0.009, 0.075, 0.937);
+
+	SetOutput(float4(dot(lr, c0.rgb) + dot(rr, c1.rgb), dot(lg, c0.rgb) + dot(rg, c1.rgb), dot(lb, c0.rgb) + dot(rb, c1.rgb), c0.a));
+}
@@ -0,0 +1,21 @@
+// Anaglyph Red-Cyan shader based on Dubois algorithm
+// Constants taken from the paper:
+// "Conversion of a Stereo Pair to Anaglyph with
+// the Least-Squares Projection Method"
+// Eric Dubois, March 2009
+
+void main()
+{
+	float4 c0 = SampleLayer(0);
+	float4 c1 = SampleLayer(1);
+
+	float3 lr = float3( 0.437, 0.449, 0.164);
+	float3 lg = float3(-0.062,-0.062,-0.024);
+	float3 lb = float3(-0.048,-0.050,-0.017);
+
+	float3 rr = float3(-0.011,-0.032,-0.007);
+	float3 rg = float3( 0.377, 0.761, 0.009);
+	float3 rb = float3(-0.026,-0.093, 1.234);
+
+	SetOutput(float4(dot(lr, c0.rgb) + dot(rr, c1.rgb), dot(lg, c0.rgb) + dot(rg, c1.rgb), dot(lb, c0.rgb) + dot(rb, c1.rgb), c0.a));
+}
@@ -0,0 +1,8 @@
+// Anaglyph Red-Cyan shader without compensation
+
+void main()
+{
+	float4 c0 = SampleLayer(0);
+	float4 c1 = SampleLayer(1);
+	SetOutput(float4(c0.r, c1.gb, c0.a));
+}
@@ -0,0 +1,10 @@
+// Anaglyph Red-Cyan grayscale shader
+
+void main()
+{
+	float4 c0 = SampleLayer(0);
+	float avg0 = (c0.r + c0.g + c0.b) / 3.0;
+	float4 c1 = SampleLayer(1);
+	float avg1 = (c1.r + c1.g + c1.b) / 3.0;
+	SetOutput(float4(avg0, avg1, avg1, c0.a));
+}
@@ -0,0 +1,12 @@
+// Anaglyph Red-Cyan luma grayscale shader
+// Info: https://web.archive.org/web/20040101053504/http://www.oreillynet.com:80/cs/user/view/cs_msg/8691
+
+void main()
+{
+	float3 luma = float3(0.222, 0.707, 0.071);
+	float4 c0 = SampleLayer(0);
+	float avg0 = dot(c0.rgb, luma);
+	float4 c1 = SampleLayer(1);
+	float avg1 = dot(c1.rgb, luma);
+	SetOutput(float4(avg0, avg1, avg1, c0.a));
+}
@@ -0,0 +1,69 @@
+// Based on https://github.com/Filoppi/PumboAutoHDR
+
+/*
+[configuration]
+
+[OptionRangeFloat]
+GUIName = HDR Display Max Nits
+OptionName = HDR_DISPLAY_MAX_NITS
+MinValue = 80
+MaxValue = 2000
+StepAmount = 1
+DefaultValue = 400
+
+[OptionRangeFloat]
+GUIName = Shoulder Start Alpha
+OptionName = AUTO_HDR_SHOULDER_START_ALPHA
+MinValue = 0
+MaxValue = 1
+StepAmount = 0.01
+DefaultValue = 0
+
+[OptionRangeFloat]
+GUIName = Shoulder Pow
+OptionName = AUTO_HDR_SHOULDER_POW
+MinValue = 1
+MaxValue = 10
+StepAmount = 0.05
+DefaultValue = 2.5
+
+[/configuration]
+*/
+
+float luminance(float3 color)
+{
+	return dot(color, float3(0.2126f, 0.7152f, 0.0722f));
+}
+
+void main()
+{
+	float4 color = Sample();
+
+	// Nothing to do here, we are in SDR
+	if (!OptionEnabled(hdr_output) || !OptionEnabled(linear_space_output))
+	{
+		SetOutput(color);
+		return;
+	}
+
+	const float hdr_paper_white = hdr_paper_white_nits / hdr_sdr_white_nits;
+
+	// Restore the original SDR (0-1) brightness (we might or might not restore it later)
+	color.rgb /= hdr_paper_white;
+
+	// Find the color luminance (it works better than average)
+	float sdr_ratio = luminance(color.rgb);
+
+	const float auto_hdr_max_white = max(HDR_DISPLAY_MAX_NITS / (hdr_paper_white_nits / hdr_sdr_white_nits), hdr_sdr_white_nits) / hdr_sdr_white_nits;
+	if (sdr_ratio > AUTO_HDR_SHOULDER_START_ALPHA && AUTO_HDR_SHOULDER_START_ALPHA < 1.0)
+	{
+		const float auto_hdr_shoulder_ratio = 1.0 - (max(1.0 - sdr_ratio, 0.0) / (1.0 - AUTO_HDR_SHOULDER_START_ALPHA));
+		const float auto_hdr_extra_ratio = pow(auto_hdr_shoulder_ratio, AUTO_HDR_SHOULDER_POW) * (auto_hdr_max_white - 1.0);
+		const float auto_hdr_total_ratio = sdr_ratio + auto_hdr_extra_ratio;
+		color.rgb *= auto_hdr_total_ratio / sdr_ratio;
+	}
+
+	color.rgb *= hdr_paper_white;
+
+	SetOutput(color);
+}
@@ -0,0 +1,67 @@
+//			DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
+//					Version 2, December 2004
+
+// Copyright (C) 2013 mudlord
+
+// Everyone is permitted to copy and distribute verbatim or modified
+// copies of this license document, and changing it is allowed as long
+// as the name is changed.
+
+//			DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
+//	TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+// 0. You just DO WHAT THE FUCK YOU WANT TO.
+
+#define FXAA_REDUCE_MIN		(1.0/ 128.0)
+#define FXAA_REDUCE_MUL		(1.0 / 8.0)
+#define FXAA_SPAN_MAX		8.0
+
+float4 applyFXAA(float2 fragCoord)
+{
+	float4 color;
+	float2 inverseVP = GetInvResolution();
+	float3 rgbNW = SampleLocation((fragCoord + float2(-1.0, -1.0)) * inverseVP).xyz;
+	float3 rgbNE = SampleLocation((fragCoord + float2(1.0, -1.0)) * inverseVP).xyz;
+	float3 rgbSW = SampleLocation((fragCoord + float2(-1.0, 1.0)) * inverseVP).xyz;
+	float3 rgbSE = SampleLocation((fragCoord + float2(1.0, 1.0)) * inverseVP).xyz;
+	float3 rgbM  = SampleLocation(fragCoord  * inverseVP).xyz;
+	float3 luma = float3(0.299, 0.587, 0.114);
+	float lumaNW = dot(rgbNW, luma);
+	float lumaNE = dot(rgbNE, luma);
+	float lumaSW = dot(rgbSW, luma);
+	float lumaSE = dot(rgbSE, luma);
+	float lumaM  = dot(rgbM,  luma);
+	float lumaMin = min(lumaM, min(min(lumaNW, lumaNE), min(lumaSW, lumaSE)));
+	float lumaMax = max(lumaM, max(max(lumaNW, lumaNE), max(lumaSW, lumaSE)));
+
+	float2 dir;
+	dir.x = -((lumaNW + lumaNE) - (lumaSW + lumaSE));
+	dir.y =  ((lumaNW + lumaSW) - (lumaNE + lumaSE));
+
+	float dirReduce = max((lumaNW + lumaNE + lumaSW + lumaSE) *
+						(0.25 * FXAA_REDUCE_MUL), FXAA_REDUCE_MIN);
+
+	float rcpDirMin = 1.0 / (min(abs(dir.x), abs(dir.y)) + dirReduce);
+	dir = min(float2(FXAA_SPAN_MAX, FXAA_SPAN_MAX),
+			max(float2(-FXAA_SPAN_MAX, -FXAA_SPAN_MAX),
+			dir * rcpDirMin)) * inverseVP;
+
+	float3 rgbA = 0.5 * (
+		SampleLocation(fragCoord * inverseVP + dir * (1.0 / 3.0 - 0.5)).xyz +
+		SampleLocation(fragCoord * inverseVP + dir * (2.0 / 3.0 - 0.5)).xyz);
+	float3 rgbB = rgbA * 0.5 + 0.25 * (
+		SampleLocation(fragCoord * inverseVP + dir * -0.5).xyz +
+		SampleLocation(fragCoord * inverseVP + dir * 0.5).xyz);
+
+	float lumaB = dot(rgbB, luma);
+	if ((lumaB < lumaMin) || (lumaB > lumaMax))
+		color = float4(rgbA, 1.0);
+	else
+		color = float4(rgbB, 1.0);
+	return color;
+}
+
+void main()
+{
+	SetOutput(applyFXAA(GetCoordinates() * GetResolution()));
+}
@@ -0,0 +1,7 @@
+// Passive (horizontal rows) shader
+
+void main()
+{
+	float screen_row = GetWindowResolution().y * GetCoordinates().y;
+	SetOutput(SampleLayer(int(screen_row) % 2));
+}
@@ -0,0 +1,129 @@
+/*
+[configuration]
+
+[OptionRangeFloat]
+GUIName = Amplificiation
+OptionName = AMPLIFICATION
+MinValue = 1.0
+MaxValue = 6.0
+StepAmount = 0.25
+DefaultValue = 2.5
+
+[/configuration]
+*/
+
+// ICtCP Colorspace as defined by Dolby here:
+// https://professional.dolby.com/siteassets/pdfs/ictcp_dolbywhitepaper_v071.pdf
+
+/***** Transfer Function *****/
+
+const float a = 0.17883277;
+const float b = 1.0 - 4.0 * a;
+const float c = 0.5 - a * log(4.0 * a);
+
+float HLG_f(float x)
+{
+    if (x < 0.0) {
+        return 0.0;
+    }
+
+    else if (x < 1.0 / 12.0) {
+        return sqrt(3.0 * x);
+    }
+
+    return a * log(12.0 * x - b) + c;
+}
+
+float HLG_inv_f(float x)
+{
+    if (x < 0.0) {
+        return 0.0;
+    }
+
+    else if (x < 1.0 / 2.0) {
+        return x * x / 3.0;
+    }
+
+    return (exp((x - c) / a) + b) / 12.0;
+}
+
+float4 HLG(float4 lms)
+{
+    return float4(HLG_f(lms.x), HLG_f(lms.y), HLG_f(lms.z), lms.w);
+}
+
+float4 HLG_inv(float4 lms)
+{
+    return float4(HLG_inv_f(lms.x), HLG_inv_f(lms.y), HLG_inv_f(lms.z), lms.w);
+}
+
+/***** Linear <--> ICtCp *****/
+
+const mat4 RGBtoLMS = mat4(
+                          1688.0, 683.0, 99.0, 0.0,
+                          2146.0, 2951.0, 309.0, 0.0,
+                          262.0, 462.0, 3688.0, 0.0,
+                          0.0, 0.0, 0.0, 4096.0)
+    / 4096.0;
+
+const mat4 LMStoICtCp = mat4(
+                            +2048.0, +3625.0, +9500.0, 0.0,
+                            +2048.0, -7465.0, -9212.0, 0.0,
+                            +0.0, +3840.0, -288.0, 0.0,
+                            +0.0, +0.0, +0.0, 4096.0)
+    / 4096.0;
+
+float4 LinearRGBToICtCP(float4 c)
+{
+    return LMStoICtCp * HLG(RGBtoLMS * c);
+}
+
+/***** ICtCp <--> Linear *****/
+
+mat4 ICtCptoLMS = inverse(LMStoICtCp);
+mat4 LMStoRGB = inverse(RGBtoLMS);
+
+float4 ICtCpToLinearRGB(float4 c)
+{
+    return LMStoRGB * HLG_inv(ICtCptoLMS * c);
+}
+
+void main()
+{
+    float4 color = Sample();
+
+    // Nothing to do here, we are in SDR
+    if (!OptionEnabled(hdr_output) || !OptionEnabled(linear_space_output)) {
+        SetOutput(color);
+        return;
+    }
+
+    // Renormalize Color to be in [0.0 - 1.0] SDR Space. We will revert this later.
+    const float hdr_paper_white = hdr_paper_white_nits / hdr_sdr_white_nits;
+    color.rgb /= hdr_paper_white;
+
+    // Convert Color to Perceptual Color Space. This will allow us to do perceptual
+    // scaling while also being able to use the luminance channel.
+    float4 ictcp_color = LinearRGBToICtCP(color);
+
+    // Scale the color in perceptual space depending on the perceived luminance.
+    //
+    // At low luminances, ~0.0, pow(AMPLIFICATION, ~0.0) ~= 1.0, so the
+    // color will appear to be unchanged. This is important as we don't want to
+    // over expose dark colors which would not have otherwise been seen.
+    //
+    // At high luminances, ~1.0, pow(AMPLIFICATION, ~1.0) ~= AMPLIFICATION,
+    // which is equivalent to scaling the color by AMPLIFICATION. This is
+    // important as we want to get the most out of the display, and we want to
+    // get bright colors to hit their target brightness.
+    //
+    // For more information, see this desmos demonstrating this scaling process:
+    // https://www.desmos.com/calculator/syjyrjsj5c
+    float exposure = length(ictcp_color.xyz);
+    ictcp_color *= pow(HLG_f(AMPLIFICATION), exposure);
+
+    // Convert back to Linear RGB and output the color to the display.
+    // We use hdr_paper_white to renormalize the color to the comfortable
+    // SDR viewing range.
+    SetOutput(hdr_paper_white * ICtCpToLinearRGB(ictcp_color));
+}
@@ -0,0 +1,16 @@
+void main()
+{
+	float4 c0 = Sample();
+	float red	= 0.0;
+	float blue	= 0.0;
+
+	if (c0.r > 0.15 && c0.b > 0.15)
+	{
+		blue = 0.5;
+		red = 0.5;
+	}
+
+	float green = max(c0.r + c0.b, c0.g);
+
+	SetOutput(float4(red, green, blue, 1.0));
+}
@@ -0,0 +1,4 @@
+void main()
+{
+	SetOutput((SampleOffset(int2(1, 1)) - SampleOffset(int2(-1, -1))) * 8.0);
+}
@@ -0,0 +1,6 @@
+void main()
+{
+	float4 a = SampleOffset(int2( 1,  1));
+	float4 b = SampleOffset(int2(-1, -1));
+	SetOutput(( a*a*1.3 - b ) * 8.0);
+}
@@ -0,0 +1,485 @@
+/*
+[configuration]
+
+[OptionBool]
+GUIName = Use target window resolution
+OptionName = USE_WINDOW_RES
+DefaultValue = true
+
+[OptionBool]
+GUIName = Debug: Calculate only one character per subgroup
+OptionName = DEBUG_ONLY_ONE_CHAR
+DefaultValue = false
+
+[/configuration]
+*/
+
+const uint MAX_CHARS = 96u;                     // max 96, must be a multiple of 32
+const bool HAVE_FULL_FEATURE_FALLBACK = false;  // terrible slow, can easily softlock the GPU
+const uint UNROLL_FALLBACK = 4;
+const uint UNROLL_SIMD = 3;  // max MAX_CHARS / 32
+
+// #undef SUPPORTS_SUBGROUP_REDUCTION
+
+#ifdef API_VULKAN
+// By default, subgroupBroadcast only supports compile time constants as index.
+// However we need an uniform instead. This is always supported in OpenGL,
+// but in Vulkan only in SPIR-V >= 1.5.
+// So fall back to subgroupShuffle on Vulkan instead.
+#define subgroupBroadcast subgroupShuffle
+#endif
+
+/*
+The header-only font
+We have 96 (ASCII) characters, each of them is 12 pixels high and 8 pixels wide.
+To store the boolean value per pixel, 96 bits per character is needed.
+So three 32 bit integers are used per character.
+This takes in total roughly 1 kB of constant buffer.
+The first character must be all-one for the optimized implementation below.
+*/
+const uint char_width = 8;
+const uint char_height = 12;
+const uint char_count = 96;
+const uint char_pixels = char_width * char_height;
+const float2 char_dim = float2(char_width, char_height);
+
+const uint rasters[char_count][(char_pixels + 31) / 32] = {
+    {0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}, {0x18181818, 0x00181818, 0x00181800},
+    {0x6c6c6c6c, 0x00000000, 0x00000000}, {0x66660000, 0xff6666ff, 0x00006666},
+    {0x1bff7e18, 0xd8f87e1f, 0x00187eff}, {0x6edb1b0e, 0x760c1830, 0x0070d8db},
+    {0x3333361c, 0x1b0e0e1b, 0x00fe63f3}, {0x18383070, 0x00000000, 0x00000000},
+    {0x0c0c1830, 0x0c0c0c0c, 0x0030180c}, {0x3030180c, 0x30303030, 0x000c1830},
+    {0x5a990000, 0x5a3cff3c, 0x00000099}, {0x18180000, 0x18ffff18, 0x00001818},
+    {0x00000000, 0x38000000, 0x000c1838}, {0x00000000, 0x00ffff00, 0x00000000},
+    {0x00000000, 0x00000000, 0x00001c1c}, {0x6060c0c0, 0x18183030, 0x06060c0c},
+    {0xe3c3663c, 0xc7cfdbf3, 0x003c66c3}, {0x181e1c18, 0x18181818, 0x007e1818},
+    {0x60c0e77e, 0x060c1830, 0x00ff0303}, {0xc0c0e77e, 0xc0e07ee0, 0x007ee7c0},
+    {0x363c3830, 0x3030ff33, 0x00303030}, {0x030303ff, 0xc0e07f03, 0x007ee7c0},
+    {0x0303e77e, 0xc3e37f03, 0x007ee7c3}, {0xc0c0c0ff, 0x0c183060, 0x000c0c0c},
+    {0xc3c3e77e, 0xc3e77ee7, 0x007ee7c3}, {0xc3c3e77e, 0xc0c0fee7, 0x007ee7c0},
+    {0x00000000, 0x00001c1c, 0x00001c1c}, {0x38000000, 0x38000038, 0x000c1838},
+    {0x0c183060, 0x0c060306, 0x00603018}, {0x00000000, 0xff00ffff, 0x000000ff},
+    {0x30180c06, 0x3060c060, 0x00060c18}, {0xc0c3c37e, 0x18183060, 0x00180000},
+    {0x7e000000, 0xdbcbbbc3, 0x00fc06f3}, {0xc3663c18, 0xc3ffc3c3, 0x00c3c3c3},
+    {0xc3c3e37f, 0xc3e37fe3, 0x007fe3c3}, {0x0303e77e, 0x03030303, 0x007ee703},
+    {0xc3e3733f, 0xc3c3c3c3, 0x003f73e3}, {0x030303ff, 0x03033f03, 0x00ff0303},
+    {0x030303ff, 0x0303033f, 0x00030303}, {0x0303e77e, 0xc3f30303, 0x007ee7c3},
+    {0xc3c3c3c3, 0xc3c3ffc3, 0x00c3c3c3}, {0x1818187e, 0x18181818, 0x007e1818},
+    {0x60606060, 0x60606060, 0x003e7763}, {0x1b3363c3, 0x1b0f070f, 0x00c36333},
+    {0x03030303, 0x03030303, 0x00ff0303}, {0xffffe7c3, 0xc3c3c3db, 0x00c3c3c3},
+    {0xcfcfc7c7, 0xf3fbdbdf, 0x00e3e3f3}, {0xc3c3e77e, 0xc3c3c3c3, 0x007ee7c3},
+    {0xc3c3e37f, 0x03037fe3, 0x00030303}, {0xc3c3663c, 0xdbc3c3c3, 0x00fc76fb},
+    {0xc3c3e37f, 0x1b0f7fe3, 0x00c36333}, {0x0303e77e, 0xc0e07e07, 0x007ee7c0},
+    {0x181818ff, 0x18181818, 0x00181818}, {0xc3c3c3c3, 0xc3c3c3c3, 0x007ee7c3},
+    {0xc3c3c3c3, 0x6666c3c3, 0x00183c3c}, {0xc3c3c3c3, 0xffdbdbc3, 0x00c3e7ff},
+    {0x3c6666c3, 0x3c3c183c, 0x00c36666}, {0x3c6666c3, 0x1818183c, 0x00181818},
+    {0x60c0c0ff, 0x060c7e30, 0x00ff0303}, {0x0c0c0c3c, 0x0c0c0c0c, 0x003c0c0c},
+    {0x0c0c0606, 0x30301818, 0xc0c06060}, {0x3030303c, 0x30303030, 0x003c3030},
+    {0xc3663c18, 0x00000000, 0x00000000}, {0x00000000, 0x00000000, 0xff000000},
+    {0x181c0c0e, 0x00000000, 0x00000000}, {0x00000000, 0xfec0c37e, 0x00fec3c3},
+    {0x03030303, 0xc3c37f03, 0x007fc3c3}, {0x00000000, 0x0303c37e, 0x007ec303},
+    {0xc0c0c0c0, 0xc3c3fec0, 0x00fec3c3}, {0x00000000, 0x7fc3c37e, 0x00fe0303},
+    {0x0c0ccc78, 0x0c0c3f0c, 0x000c0c0c}, {0x00000000, 0xc3c3c37e, 0xc3c0c0fe},
+    {0x03030303, 0xc3c3c37f, 0x00c3c3c3}, {0x00001800, 0x18181818, 0x00181818},
+    {0x00003000, 0x30303030, 0x36303030}, {0x03030303, 0x0f1b3363, 0x0063331f},
+    {0x1818181e, 0x18181818, 0x007e1818}, {0x00000000, 0xdbdbdb7f, 0x00dbdbdb},
+    {0x00000000, 0x6363633f, 0x00636363}, {0x00000000, 0x6363633e, 0x003e6363},
+    {0x00000000, 0xc3c3c37f, 0x03037fc3}, {0x00000000, 0xc3c3c3fe, 0xc0c0fec3},
+    {0x00000000, 0x0303077f, 0x00030303}, {0x00000000, 0x7e0303fe, 0x007fc0c0},
+    {0x0c0c0c00, 0x0c0c0c3f, 0x00386c0c}, {0x00000000, 0x63636363, 0x007e6363},
+    {0x00000000, 0x6666c3c3, 0x00183c3c}, {0x00000000, 0xdbc3c3c3, 0x00c3e7ff},
+    {0x00000000, 0x183c66c3, 0x00c3663c}, {0x00000000, 0x3c6666c3, 0x06060c18},
+    {0x00000000, 0x183060ff, 0x00ff060c}, {0x181818f0, 0x181c0f1c, 0x00f01818},
+    {0x18181818, 0x18181818, 0x18181818}, {0x1818180f, 0x1838f038, 0x000f1818},
+    {0x06000000, 0x0060f18f, 0x00000000}, {0x00000000, 0x00000000, 0x00000000}};
+
+// Precalculated sum of all pixels per character
+const uint raster_active_pixels[char_count] = {
+    96, 18, 16, 40, 56, 42, 46, 10, 22, 22, 32, 28, 10, 16, 6,  24, 52, 29, 36, 44, 35, 42, 50, 28,
+    58, 51, 12, 16, 22, 32, 22, 26, 41, 46, 57, 38, 52, 38, 32, 46, 48, 30, 31, 43, 28, 56, 64, 52,
+    42, 52, 52, 44, 28, 48, 42, 58, 42, 32, 38, 26, 24, 26, 14, 8,  10, 34, 40, 26, 40, 32, 30, 33,
+    39, 16, 20, 37, 28, 43, 30, 30, 34, 34, 20, 28, 27, 30, 26, 36, 26, 24, 26, 30, 24, 30, 14, 0};
+
+// Get one sample of the font: (pixel index, character index)
+float SampleFont(uint2 pos)
+{
+  return (rasters[pos.y][pos.x / 32] >> (pos.x % 32)) & uint(1);
+}
+
+// Get one sample of the framebuffer: (character position in screen space, pixel index)
+float3 SampleTex(uint2 char_pos, uint pixel)
+{
+  float2 inv_resoltion =
+      OptionEnabled(USE_WINDOW_RES) ? GetInvWindowResolution() : GetInvResolution();
+  float2 tex_pos = char_pos * char_dim + float2(pixel % char_width, pixel / char_width) + 0.5;
+  return SampleLocation(tex_pos * inv_resoltion).xyz;
+}
+
+struct CharResults
+{
+  float3 fg;  // font color
+  float3 bg;  // background color
+  float err;  // MSE of this configuration
+  uint c;     // character index
+};
+
+// Calculate the font and background color and the MSE for a given character
+CharResults CalcCharRes(uint c, float3 t, float3 ft)
+{
+  CharResults o;
+  o.c = c;
+
+  // Inputs:
+  // tt: sum of all texture samples squared
+  // t: sum of all texture samples
+  // ff: sum of all font samples squared
+  // f: sum of all font samples
+  // ft: sum of all font samples * texture samples
+
+  // The font is either 1.0 or 0.0, so ff == f
+  // As the font is constant, this is pre-calculated
+  float f = raster_active_pixels[c];
+  float ff = f;
+
+  // The calculation isn't stable if the font is all-one. Return max err
+  // instead.
+  if (f == char_pixels)
+  {
+    o.err = char_pixels * char_pixels;
+    return o;
+  }
+
+  // tt is only used as constant offset for the error, define it as zero
+  float3 tt = float3(0.0, 0.0, 0.0);
+
+  // The next lines are a bit harder, hf :-)
+
+  // The idea is to find the perfect char with the perfect background color
+  // and the perfect font color. As this is an equation with three unknowns,
+  // we can't just try all chars and color combinations.
+
+  // As criterion how "perfect" the selection is, we compare the "mean
+  // squared error" of the resulted colors of all chars. So, now the big
+  // issue: how to calculate the MSE without knowing the two colors ...
+
+  // In the next steps, "a" is the font color, "b" is the background color,
+  // "f" is the font value at this pixel, "t" is the texture value
+
+  // So the square error of one pixel is:
+  // e = ( t - a⋅f - b⋅(1-f) ) ^ 2
+
+  // In longer:
+  // e = a^2⋅f^2 - 2⋅a⋅b⋅f^2 + 2⋅a⋅b⋅f - 2⋅a⋅f⋅t + b^2⋅f^2 - 2⋅b^2⋅f + b^2 +
+  // 2⋅b⋅f⋅t - 2⋅b⋅t + t^2
+
+  // The sum of all errors is: (as shortcut, ff,f,ft,t,tt are now the sums
+  // like declared above, sum(1) is the count of pixels) sum(e) = a^2⋅ff -
+  // 2⋅a^2⋅ff + 2⋅a⋅b⋅f - 2⋅a⋅ft + b^2⋅ff - 2⋅b^2⋅f + b^2⋅sum(1) + 2⋅b⋅ft -
+  // 2⋅b⋅t + tt
+
+  // tt is only used as a constant offset, so its value has no effect on a,b or
+  // on the relative error. So it can be completely dropped.
+
+  // To find the minimum, we have to derive this by "a" and "b":
+  // d/da sum(e) = 2⋅a⋅ff + 2⋅b⋅f - 2⋅b⋅ff - 2⋅ft
+  // d/db sum(e) = 2⋅a⋅f - 2⋅a⋅ff - 4⋅b⋅f + 2⋅b⋅ff + 2⋅b⋅sum(1) + 2⋅ft - 2⋅t
+
+  // So, both equations must be zero at minimum and there is only one
+  // solution.
+
+  float3 a = (ft * (f - float(char_pixels)) + t * (f - ff)) / (f * f - ff * float(char_pixels));
+  float3 b = (ft * f - t * ff) / (f * f - ff * float(char_pixels));
+
+  float3 e = a * a * ff + 2.0 * a * b * (f - ff) - 2.0 * a * ft +
+             b * b * (-2.0 * f + ff + float(char_pixels)) + 2.0 * b * ft - 2.0 * b * t + tt;
+  o.err = dot(e, float3(1.0, 1.0, 1.0));
+
+  o.fg = a;
+  o.bg = b;
+  o.c = c;
+
+  return o;
+}
+
+// Get the color of the pixel of this invocation based on the character details
+float3 GetFinalPixel(CharResults char_out)
+{
+  float2 resolution = OptionEnabled(USE_WINDOW_RES) ? GetWindowResolution() : GetResolution();
+  uint2 char_pos = uint2(floor(GetCoordinates() * resolution / char_dim));
+  uint2 pixel_offset = uint2(floor(GetCoordinates() * resolution) - char_pos * char_dim);
+  float font = SampleFont(int2(pixel_offset.x + char_width * pixel_offset.y, char_out.c));
+  return char_out.fg * font + char_out.bg * (1.0 - font);
+}
+
+/*
+  This shader performs some kind of brute force evaluation, which character fits best.
+
+  for c in characters:
+    for p in pixels:
+      ft += font(c,p) * texture(p)
+    res = CalcCharRes(ft)
+  min(res.err)
+
+  Terrible in performance, only for reference.
+  */
+CharResults CalcCharTrivial(uint2 char_pos)
+{
+  float3 t;
+  CharResults char_out;
+  char_out.err = char_pixels * char_pixels;
+  for (uint c = 0; c < MAX_CHARS; c += 1)
+  {
+    float3 ft = float3(0.0, 0.0, 0.0);
+    for (uint pixel = 0; pixel < char_pixels; pixel += 1)
+    {
+      float3 tex = SampleTex(char_pos, pixel);
+      float font = SampleFont(uint2(pixel, c));
+      ft += font * tex;
+    }
+    if (c == 0)
+      t = ft;
+    CharResults res = CalcCharRes(c, t, ft);
+    if (res.err < char_out.err)
+      char_out = res;
+  }
+  return char_out;
+}
+
+/*
+  However for better performance, some characters are tested at once. This saves some expensive
+  texture() calls. Also split the loop over the pixels in groups of 32 for only fetching the uint32
+  of the font once.
+*/
+CharResults CalcCharFallback(uint2 char_pos)
+{
+  float3 t;
+  CharResults char_out;
+  char_out.err = char_pixels * char_pixels;
+  for (uint c = 0; c < MAX_CHARS; c += UNROLL_FALLBACK)
+  {
+    // Declare ft
+    float3 ft[UNROLL_FALLBACK];
+    for (uint i = 0; i < UNROLL_FALLBACK; i++)
+      ft[i] = float3(0.0, 0.0, 0.0);
+
+    // Split `for p : pixels` in groups of 32. This makes accessing the texture (bit in uint32)
+    // easier.
+    for (uint pixel = 0; pixel < char_pixels; pixel += 32)
+    {
+      uint font_i[UNROLL_FALLBACK];
+      for (uint i = 0; i < UNROLL_FALLBACK; i++)
+        font_i[i] = rasters[c + i][pixel / 32];
+
+      for (uint pixel_offset = 0; pixel_offset < 32; pixel_offset += 1)
+      {
+        float3 tex = SampleTex(char_pos, pixel + pixel_offset);
+
+        // Inner kernel of `ft += font * tex`. Most time is spend in here.
+        for (uint i = 0; i < UNROLL_FALLBACK; i++)
+        {
+          float font = (font_i[i] >> pixel_offset) & uint(1);
+
+          ft[i] += font * tex;
+        }
+      }
+    }
+    if (c == 0)
+    {
+      // First char has font := 1, so t = ft. Cache this value for the next iterations.
+      t = ft[0];
+    }
+
+    // Check if this character fits better than the last one.
+    for (uint i = 0; i < UNROLL_FALLBACK; i++)
+    {
+      CharResults res = CalcCharRes(c + i, t, ft[i]);
+      if (res.err < char_out.err)
+        char_out = res;
+    }
+  }
+
+  return char_out;
+}
+
+/*
+  SIMD optimized version with subgroup intrinsics
+  - distribute all characters over the lanes and check for them in parallel
+  - distribute the uniform texture access and broadcast each back to each lane
+*/
+CharResults CalcCharSIMD(uint2 char_pos, uint simd_width)
+{
+  // Font color, bg color, character, error -- of character with minimum error
+  CharResults char_out;
+  char_out.err = char_pixels * char_pixels;
+  float3 t;
+#ifdef SUPPORTS_SUBGROUP_REDUCTION
+
+  // Hack: Work in hard-codeded fixed SIMD mode
+  if (gl_SubgroupInvocationID < simd_width)
+  {
+    // Loop over all characters
+    for (uint c = 0; c < MAX_CHARS; c += UNROLL_SIMD * simd_width)
+    {
+      // registers for "sum of font * texture"
+      float3 ft[UNROLL_SIMD];
+      for (uint i = 0; i < UNROLL_SIMD; i++)
+        ft[i] = float3(0.0, 0.0, 0.0);
+
+      for (uint pixel = 0; pixel < char_pixels; pixel += 32)
+      {
+        // Preload the font uint32 for the next 32 pixels
+        uint font_i[UNROLL_SIMD];
+        for (uint i = 0; i < UNROLL_SIMD; i++)
+          font_i[i] = rasters[c + UNROLL_SIMD * gl_SubgroupInvocationID + i][pixel / 32];
+
+        for (uint pixel_offset = 0; pixel_offset < 32; pixel_offset += simd_width)
+        {
+          // Copy one full WRAP of textures into registers and shuffle them around for later usage.
+          // This avoids one memory transaction per tested pixel & character.
+          float3 tex_simd = SampleTex(char_pos, pixel + pixel_offset + gl_SubgroupInvocationID);
+
+          for (uint k = 0; k < simd_width; k += 1)
+          {
+            float3 tex = subgroupBroadcast(tex_simd, k);
+
+            // Note: As pixel iterates based on power-of-two gl_SubgroupSize,
+            // the const memory access to rasters is CSE'd and the inner loop
+            // after unrolling only contains: testing one bit + shuffle +
+            // conditional add
+            for (uint i = 0; i < UNROLL_SIMD; i++)
+            {
+              float font = (font_i[i] >> (k + pixel_offset % 32)) & uint(1);
+              ft[i] += font * tex;
+            }
+          }
+        }
+      }
+      if (c == 0)
+      {
+        // font[0] is a hardcoded 1 font, so t = ft
+        t = subgroupBroadcast(ft[0], 0);
+      }
+
+      for (uint i = 0; i < UNROLL_SIMD; i++)
+      {
+        CharResults res = CalcCharRes(c + UNROLL_SIMD * gl_SubgroupInvocationID + i, t, ft[i]);
+        if (res.err < char_out.err)
+          char_out = res;
+      }
+    }
+  }
+
+  // Broadcast to get the best character of all threads
+  float err_min = subgroupMin(char_out.err);
+  uint smallest = subgroupBallotFindLSB(subgroupBallot(err_min == char_out.err));
+  char_out.fg = subgroupBroadcast(char_out.fg, smallest);
+  char_out.bg = subgroupBroadcast(char_out.bg, smallest);
+  char_out.c = subgroupBroadcast(char_out.c, smallest);
+  char_out.err = err_min;
+
+#endif
+  return char_out;
+}
+
+bool supportsSIMD(uint simd_width)
+{
+#ifdef SUPPORTS_SUBGROUP_REDUCTION
+  const uint mask = simd_width == 32u ? 0xFFFFFFFFu : (1u << simd_width) - 1;
+  return (subgroupBallot(true)[0] & mask) == mask;
+#else
+  return false;
+#endif
+}
+
+// "Error: The AsciiArt shader requires the missing GPU extension KHR_shader_subgroup."
+const uint missing_subgroup_warning_len = 82;
+const uint missing_subgroup_warning[missing_subgroup_warning_len] = {
+    37, 82, 82, 79, 82, 26, 95, 52, 72, 69, 95, 33, 83, 67, 73, 73, 33, 82, 84, 95, 83,
+    72, 65, 68, 69, 82, 95, 82, 69, 81, 85, 73, 82, 69, 83, 95, 84, 72, 69, 95, 77, 73,
+    83, 83, 73, 78, 71, 95, 39, 48, 53, 95, 69, 88, 84, 69, 78, 84, 73, 79, 78, 95, 43,
+    40, 50, 63, 83, 72, 65, 68, 69, 82, 63, 83, 85, 66, 71, 82, 79, 85, 80, 14};
+
+float3 ShowWarning(uint2 char_pos)
+{
+  CharResults char_out;
+  char_out.fg = float3(1.0, 1.0, 1.0);
+  char_out.bg = float3(0.0, 0.0, 0.0);
+  char_out.c = 95u;  // just background
+
+  if (char_pos.y == 0u && char_pos.x < missing_subgroup_warning_len)
+  {
+    char_out.c = missing_subgroup_warning[char_pos.x];
+  }
+
+  return GetFinalPixel(char_out);
+}
+
+void main()
+{
+  // Calculate the character position of this pixel
+  float2 resolution = OptionEnabled(USE_WINDOW_RES) ? GetWindowResolution() : GetResolution();
+  uint2 char_pos_self = uint2(floor(GetCoordinates() * resolution / char_dim));
+
+  float3 color_out;
+
+#ifdef SUPPORTS_SUBGROUP_REDUCTION
+  if (supportsSIMD(8))
+  {
+    // Loop over all character positions covered by this wave
+    bool pixel_active = !gl_HelperInvocation;
+    CharResults char_out;
+    while (true)
+    {
+      // Fetch the next active character position
+      uint4 active_lanes = subgroupBallot(pixel_active);
+      if (active_lanes == uint4(0, 0, 0, 0))
+      {
+        break;
+      }
+      uint2 char_pos = subgroupBroadcast(char_pos_self, subgroupBallotFindLSB(active_lanes));
+
+      // And calculate everything for this character position
+      if (supportsSIMD(32))
+      {
+        char_out = CalcCharSIMD(char_pos, 32);
+      }
+      else if (supportsSIMD(16))
+      {
+        char_out = CalcCharSIMD(char_pos, 16);
+      }
+      else if (supportsSIMD(8))
+      {
+        char_out = CalcCharSIMD(char_pos, 8);
+      }
+
+      // Draw the character on screen
+      if (char_pos == char_pos_self)
+      {
+        color_out = GetFinalPixel(char_out);
+        pixel_active = false;
+      }
+      if (OptionEnabled(DEBUG_ONLY_ONE_CHAR))
+      {
+        break;
+      }
+    }
+  }
+  else
+#else
+  if (char_pos_self.y <= 1u)
+  {
+    color_out = ShowWarning(char_pos_self);
+  }
+  else
+#endif
+      if (HAVE_FULL_FEATURE_FALLBACK)
+  {
+    color_out = GetFinalPixel(CalcCharFallback(char_pos_self));
+  }
+  else
+  {
+    color_out = Sample().xyz;
+  }
+
+  SetOutput(float4(color_out, 1.0));
+}
@@ -0,0 +1,15 @@
+void main()
+{
+	float4 to_gray = float4(0.3,0.59,0.11,0);
+
+	float x1 = dot(to_gray, SampleOffset(int2( 1, 1)));
+	float x0 = dot(to_gray, SampleOffset(int2(-1,-1)));
+	float x3 = dot(to_gray, SampleOffset(int2( 1,-1)));
+	float x2 = dot(to_gray, SampleOffset(int2(-1, 1)));
+
+	float edge = (x1 - x0) * (x1 - x0) + (x3 - x2) * (x3 - x2);
+
+	float4 color = Sample();
+
+	SetOutput(color - float4(edge, edge, edge, edge) * 12.0);
+}
@@ -0,0 +1,88 @@
+void main()
+{
+	//Changethis to increase the number of colors.
+	int numColors =8;
+
+	float4 to_gray = float4(0.3,0.59,0.11,0);
+	float x1 = dot(to_gray, SampleOffset(int2( 1, 1)));
+	float x0 = dot(to_gray, SampleOffset(int2(-1,-1)));
+	float x3 = dot(to_gray, SampleOffset(int2( 1,-1)));
+	float x2 = dot(to_gray, SampleOffset(int2(-1, 1)));
+	float edge = (x1 - x0) * (x1 - x0) + (x3 - x2) * (x3 - x2);
+	float4 color = Sample();
+
+	float4 c0 = color - float4(edge, edge, edge, edge) * 12.0;
+
+	float red   = 0.0;
+	float green = 0.0;
+	float blue  = 0.0;
+	bool rr = false;
+	bool bb = false;
+	bool gg = false;
+	int count = 1;
+
+	float colorN = 0.0;
+	float colorB = 0.0;
+
+	for (count = 1; count <= numColors; count++)
+	{
+		colorN = float(count / numColors);
+
+		if ( c0.r <= colorN && c0.r >= colorB && rr == false )
+		{
+			if (count == 1)
+			{
+				if (colorN >= 0.1)
+					red = 0.01;
+				else
+					red = colorN;
+			}
+			else if (count == numColors)
+				red = 0.95;
+			else
+				red = colorN;
+
+			 rr = true;
+		}
+
+		 if (c0.b <= colorN && c0.b >= colorB && bb == false)
+		 {
+			if (count == 1)
+			{
+				if (colorN >= 0.1)
+					blue = 0.01;
+				else
+					blue = colorN;
+			}
+			else if (count == numColors)
+				blue = 0.95;
+			else
+				blue = colorN ;
+
+			bb = true;
+		}
+
+		if (c0.g <= colorN && c0.g >= colorB && gg == false)
+		{
+			if (count == 1)
+			{
+				if (colorN >= 0.1)
+					green = 0.01;
+				else
+					green = colorN;
+			}
+			else if (count == numColors)
+				green = 0.95;
+			else
+				green = colorN;
+			gg = true;
+		}
+
+		colorB = float(count / numColors);
+
+		if (rr == true && bb == true && gg == true)
+			break;
+	}
+
+	SetOutput(float4(red, green, blue, c0.a));
+}
@@ -0,0 +1,36 @@
+void main()
+{
+	float4 c_center = Sample();
+
+	float4 bloom_sum = float4(0.0, 0.0, 0.0, 0.0);
+	float2 pos = GetCoordinates() + float2(0.3, 0.3) * GetInvResolution();
+	float2 radius1 = 1.3 * GetInvResolution();
+	bloom_sum += SampleLocation(pos + float2(-1.5, -1.5) * radius1);
+	bloom_sum += SampleLocation(pos + float2(-2.5, 0.0)  * radius1);
+	bloom_sum += SampleLocation(pos + float2(-1.5, 1.5) * radius1);
+	bloom_sum += SampleLocation(pos + float2(0.0, 2.5) * radius1);
+	bloom_sum += SampleLocation(pos + float2(1.5, 1.5) * radius1);
+	bloom_sum += SampleLocation(pos + float2(2.5, 0.0) * radius1);
+	bloom_sum += SampleLocation(pos + float2(1.5, -1.5) * radius1);
+	bloom_sum += SampleLocation(pos + float2(0.0, -2.5) * radius1);
+
+	float2 radius2 = 4.6 * GetInvResolution();
+	bloom_sum += SampleLocation(pos + float2(-1.5, -1.5) * radius2);
+	bloom_sum += SampleLocation(pos + float2(-2.5, 0.0)  * radius2);
+	bloom_sum += SampleLocation(pos + float2(-1.5, 1.5)  * radius2);
+	bloom_sum += SampleLocation(pos + float2(0.0, 2.5)  * radius2);
+	bloom_sum += SampleLocation(pos + float2(1.5, 1.5)  * radius2);
+	bloom_sum += SampleLocation(pos + float2(2.5, 0.0)  * radius2);
+	bloom_sum += SampleLocation(pos + float2(1.5, -1.5)  * radius2);
+	bloom_sum += SampleLocation(pos + float2(0.0, -2.5)  * radius2);
+
+	bloom_sum *= 0.07;
+	bloom_sum -= float4(0.3, 0.3, 0.3, 0.3);
+	bloom_sum = max(bloom_sum, float4(0.0, 0.0, 0.0, 0.0));
+
+	float2 vpos = (GetCoordinates() - float2(0.5, 0.5)) * 2.0;
+	float dist = (dot(vpos, vpos));
+	dist = 1.0 - 0.4*dist;
+
+	SetOutput((c_center * 0.7 + bloom_sum) * dist);
+}
@@ -0,0 +1,4 @@
+void main()
+{
+	SetOutput(Sample()* 3.0);
+}
@@ -0,0 +1,13 @@
+void main()
+{
+	float4 c0 = Sample();
+	float red	= 0.0;
+	float green	= 0.0;
+
+	if (c0.r < 0.35 || c0.b > 0.35)
+		green = c0.g + (c0.b / 2.0);
+	else
+		red = c0.r + 0.4;
+
+	SetOutput(float4(red, green, 0.0, 1.0));
+}
@@ -0,0 +1,20 @@
+void main()
+{
+	float4 c0 = Sample();
+	float red	= 0.0;
+	float green	= 0.0;
+	float blue	= 0.0;
+
+	if (c0.r < 0.50 || c0.b > 0.5)
+	{
+		blue = c0.r;
+		red = c0.g;
+	}
+	else
+	{
+		blue = c0.r;
+		green = c0.r;
+	}
+
+	SetOutput(float4(red, green, blue, 1.0));
+}
@@ -0,0 +1,33 @@
+void main()
+{
+	float4 c0 = Sample();
+	float4 c1 = SampleOffset(int2(-1,  0));
+	float4 c2 = SampleOffset(int2( 0, -1));
+	float4 c3 = SampleOffset(int2( 1,  0));
+	float4 c4 = SampleOffset(int2( 0,  1));
+
+	float red = c0.r;
+	float blue = c0.b;
+	float green = c0.g;
+
+	float red2 = (c1.r + c2.r + c3.r + c4.r) / 4.0;
+	float blue2 = (c1.b + c2.b + c3.b + c4.b) / 4.0;
+	float green2 = (c1.g + c2.g + c3.g + c4.g) / 4.0;
+
+	if (red2 > 0.3)
+		red = c0.r + c0.r / 2.0;
+	else
+		red = c0.r - c0.r / 2.0;
+
+	if (green2 > 0.3)
+		green = c0.g+ c0.g / 2.0;
+	else
+		green = c0.g - c0.g / 2.0;
+
+	if (blue2  > 0.3)
+		blue = c0.b+ c0.b / 2.0;
+	else
+		blue = c0.b - c0.b / 2.0;
+
+	SetOutput(float4(red, green, blue, c0.a));
+}
@@ -0,0 +1,476 @@
+/***** COLOR CORRECTION *****/
+
+// Color Space references:
+// https://www.unravel.com.au/understanding-color-spaces
+
+// SMPTE 170M - BT.601 (NTSC-M) -> BT.709
+mat3 from_NTSCM = transpose(mat3(
+	0.939497225737661,		0.0502268452914346,		0.0102759289709032,
+	0.0177558637510127,		0.965824605885027,		0.0164195303639603,
+	-0.00162163209967010,	-0.00437400622653655,	1.00599563832621));
+
+// ARIB TR-B9 (9300K+27MPCD with chromatic adaptation) (NTSC-J) -> BT.709
+mat3 from_NTSCJ = transpose(mat3(
+	0.768497526,		-0.210804164,	  0.000297427177,
+	0.0397904068,		1.04825413,			0.00555809540,
+	0.00147510506,	0.0328789241,		1.36515128));
+
+// EBU - BT.470BG/BT.601 (PAL) -> BT.709
+mat3 from_PAL = transpose(mat3(
+	1.04408168421813,		-0.0440816842181253,	0.000000000000000,
+	0.000000000000000,	1.00000000000000,			0.000000000000000,
+	0.000000000000000,	0.0118044782106489,		0.988195521789351));
+
+float3 LinearTosRGBGamma(float3 color)
+{
+	const float a = 0.055;
+
+	for (int i = 0; i < 3; ++i)
+	{
+		float x = color[i];
+		if (x <= 0.0031308)
+			x = x * 12.92;
+		else
+			x = (1.0 + a) * pow(x, 1.0 / 2.4) - a;
+		color[i] = x;
+	}
+
+	return color;
+}
+
+float3 Rec709_Luminance = float3(0.2126, 0.7152, 0.0722);
+
+float Luminance(float3 color, bool native_color_space)
+{
+	if (native_color_space && OptionEnabled(correct_color_space))
+	{
+		if (game_color_space == 0)
+			color *= from_NTSCM;
+		else if (game_color_space == 1)
+			color *= from_NTSCJ;
+		else if (game_color_space == 2)
+			color *= from_PAL;
+	}
+	return dot(color, Rec709_Luminance);
+}
+
+/***** COLOR SAMPLING *****/
+
+// Non filtered gamma corrected sample (nearest neighbor)
+float4 QuickSample(float3 uvw, float gamma)
+{
+#if 0 // Test sampling range
+	const float threshold = 0.00000001;
+	float2 xy = uvw.xy * GetResolution();
+	// Sampling outside the valid range, draw in yellow
+	if (xy.x < (0.0 - threshold) || xy.x > (GetResolution().x + threshold) || xy.y < (0.0 - threshold) || xy.y > (GetResolution().y + threshold))
+		return float4(1.0, 1.0, 0.0, 1);
+	// Sampling at the edges, draw in purple
+	if (xy.x < 1.0 || xy.x > (GetResolution().x - 1.0) || xy.y < 1.0 || xy.y > (GetResolution().y - 1.0))
+		return float4(0.5, 0, 0.5, 1);
+#endif
+
+	float4 color = texture(samp1, uvw);
+	color.rgb = pow(color.rgb, float3(gamma));
+	return color;
+}
+float4 QuickSample(float2 uv, float w, float gamma)
+{
+	return QuickSample(float3(uv, w), gamma);
+}
+float4 QuickSampleByPixel(float2 xy, float w, float gamma)
+{
+	float3 uvw = float3(xy * GetInvResolution(), w);
+	return QuickSample(uvw, gamma);
+}
+
+/***** Bilinear Interpolation *****/
+
+float4 BilinearSample(float3 uvw, float gamma)
+{
+	// This emulates the (bi)linear filtering done directly from GPUs HW.
+	// Note that GPUs might natively filter red green and blue differently, but we don't do it.
+	// They might also use different filtering between upscaling and downscaling.
+	float2 source_size = GetResolution();
+	float2 pixel = (uvw.xy * source_size) - 0.5; // Try to find the matching pixel top left corner
+
+	// Find the integer and floating point parts
+	float2 int_pixel = floor(pixel);
+	float2 frac_pixel = fract(pixel);
+
+	// Take 4 samples around the original uvw
+	float4 c11 = QuickSampleByPixel(int_pixel + float2(0.5, 0.5), uvw.z, gamma);
+	float4 c21 = QuickSampleByPixel(int_pixel + float2(1.5, 0.5), uvw.z, gamma);
+	float4 c12 = QuickSampleByPixel(int_pixel + float2(0.5, 1.5), uvw.z, gamma);
+	float4 c22 = QuickSampleByPixel(int_pixel + float2(1.5, 1.5), uvw.z, gamma);
+
+	// Blend the 4 samples by their weight
+	float4 avg_c = lerp(lerp(c11, c21, frac_pixel.x), lerp(c12, c22, frac_pixel.x), frac_pixel.y);
+
+	// Calculate the average of the gamma space luminance, as that can be used to
+	// properly determine the perceptual brightness the color should have.
+	float l11 = pow(Luminance(c11.rgb, true), 1.0 / gamma);
+	float l21 = pow(Luminance(c21.rgb, true), 1.0 / gamma);
+	float l12 = pow(Luminance(c12.rgb, true), 1.0 / gamma);
+	float l22 = pow(Luminance(c22.rgb, true), 1.0 / gamma);
+	float target_avg_c_lum = lerp(lerp(l11, l21, frac_pixel.x), lerp(l12, l22, frac_pixel.x), frac_pixel.y);
+
+	float avg_c_lum = Luminance(avg_c.rgb, true);
+	if (avg_c_lum != 0.0)
+	{
+		avg_c.rgb *= pow(target_avg_c_lum, gamma) / avg_c_lum;
+	}
+
+	return avg_c;
+}
+
+/***** Bicubic Interpolation *****/
+
+// Formula derived from:
+// https://en.wikipedia.org/wiki/Mitchell%E2%80%93Netravali_filters#Definition
+// Values from:
+// https://guideencodemoe-mkdocs.readthedocs.io/encoding/resampling/#mitchell-netravali-bicubic
+// Other references:
+// https://www.codeproject.com/Articles/236394/Bi-Cubic-and-Bi-Linear-Interpolation-with-GLSL
+// https://github.com/ValveSoftware/gamescope/pull/740
+// https://stackoverflow.com/questions/13501081/efficient-bicubic-filtering-code-in-glsl
+#define CUBIC_COEFF_GEN(B, C)																						\
+	(mat4(/* t^0 */ ((B) / 6.0), (-(B) / 3.0 + 1.0), ((B) / 6.0), (0.0),	\
+		/* t^1 */ (-(B) / 2.0 - (C)), (0.0), ((B) / 2.0 + (C)), (0.0),			\
+		/* t^2 */ ((B) / 2.0 + 2.0 * (C)), (2.0 * (B) + (C)-3.0),						\
+		(-5.0 * (B) / 2.0 - 2.0 * (C) + 3.0), (-(C)),												\
+		/* t^3 */ (-(B) / 6.0 - (C)), (-3.0 * (B) / 2.0 - (C) + 2.0),				\
+		(3.0 * (B) / 2.0 + (C)-2.0), ((B) / 6.0 + (C))))
+
+float4 CubicCoeffs(float t, mat4 coeffs)
+{
+	return coeffs * float4(1.0, t, t * t, t * t * t);
+}
+
+float4 CubicMix(float4 c0, float4 c1, float4 c2, float4 c3, float4 coeffs)
+{
+	return c0 * coeffs[0] + c1 * coeffs[1] + c2 * coeffs[2] + c3 * coeffs[3];
+}
+
+// By Sam Belliveau. Public Domain license.
+// Simple 16 tap, gamma correct, implementation of bicubic filtering.
+float4 BicubicSample(float3 uvw, float gamma, mat4 coeffs)
+{
+	float2 pixel = (uvw.xy * GetResolution()) - 0.5;
+	float2 int_pixel = floor(pixel);
+	float2 frac_pixel = fract(pixel);
+
+	float4 c00 = QuickSampleByPixel(int_pixel + float2(-0.5, -0.5), uvw.z, gamma);
+	float4 c10 = QuickSampleByPixel(int_pixel + float2(+0.5, -0.5), uvw.z, gamma);
+	float4 c20 = QuickSampleByPixel(int_pixel + float2(+1.5, -0.5), uvw.z, gamma);
+	float4 c30 = QuickSampleByPixel(int_pixel + float2(+2.5, -0.5), uvw.z, gamma);
+
+	float4 c01 = QuickSampleByPixel(int_pixel + float2(-0.5, +0.5), uvw.z, gamma);
+	float4 c11 = QuickSampleByPixel(int_pixel + float2(+0.5, +0.5), uvw.z, gamma);
+	float4 c21 = QuickSampleByPixel(int_pixel + float2(+1.5, +0.5), uvw.z, gamma);
+	float4 c31 = QuickSampleByPixel(int_pixel + float2(+2.5, +0.5), uvw.z, gamma);
+
+	float4 c02 = QuickSampleByPixel(int_pixel + float2(-0.5, +1.5), uvw.z, gamma);
+	float4 c12 = QuickSampleByPixel(int_pixel + float2(+0.5, +1.5), uvw.z, gamma);
+	float4 c22 = QuickSampleByPixel(int_pixel + float2(+1.5, +1.5), uvw.z, gamma);
+	float4 c32 = QuickSampleByPixel(int_pixel + float2(+2.5, +1.5), uvw.z, gamma);
+
+	float4 c03 = QuickSampleByPixel(int_pixel + float2(-0.5, +2.5), uvw.z, gamma);
+	float4 c13 = QuickSampleByPixel(int_pixel + float2(+0.5, +2.5), uvw.z, gamma);
+	float4 c23 = QuickSampleByPixel(int_pixel + float2(+1.5, +2.5), uvw.z, gamma);
+	float4 c33 = QuickSampleByPixel(int_pixel + float2(+2.5, +2.5), uvw.z, gamma);
+
+	float4 cx = CubicCoeffs(frac_pixel.x, coeffs);
+	float4 cy = CubicCoeffs(frac_pixel.y, coeffs);
+
+	float4 x0 = CubicMix(c00, c10, c20, c30, cx);
+	float4 x1 = CubicMix(c01, c11, c21, c31, cx);
+	float4 x2 = CubicMix(c02, c12, c22, c32, cx);
+	float4 x3 = CubicMix(c03, c13, c23, c33, cx);
+
+	return CubicMix(x0, x1, x2, x3, cy);
+}
+
+/***** Sharp Bilinear Filtering *****/
+
+// Based on https://github.com/libretro/slang-shaders/blob/master/interpolation/shaders/sharp-bilinear.slang
+// by Themaister, Public Domain license
+// Does a bilinear stretch, with a preapplied Nx nearest-neighbor scale,
+// giving a sharper image than plain bilinear.
+float4 SharpBilinearSample(float3 uvw, float gamma)
+{
+	float2 source_size = GetResolution();
+	float2 inverted_source_size = GetInvResolution();
+	float2 target_size = GetWindowResolution();
+	float2 texel = uvw.xy * source_size;
+	float2 texel_floored = floor(texel);
+	float2 s = fract(texel);
+	float scale = max(floor(max(target_size.x * inverted_source_size.x, target_size.y * inverted_source_size.y)), 1.0);
+	float region_range = 0.5 - (0.5 / scale);
+
+	// Figure out where in the texel to sample to get correct pre-scaled bilinear.
+
+	float2 center_dist = s - 0.5;
+	float2 f = ((center_dist - clamp(center_dist, -region_range, region_range)) * scale) + 0.5;
+
+	float2 mod_texel = texel_floored + f;
+
+	uvw.xy = mod_texel * inverted_source_size;
+	return BilinearSample(uvw, gamma);
+}
+
+/***** Area Sampling *****/
+
+// By Sam Belliveau and Filippo Tarpini. Public Domain license.
+// Effectively a more accurate sharp bilinear filter when upscaling,
+// that also works as a mathematically perfect downscale filter.
+// https://entropymine.com/imageworsener/pixelmixing/
+// https://github.com/obsproject/obs-studio/pull/1715
+// https://legacy.imagemagick.org/Usage/filter/
+float4 AreaSampling(float3 uvw, float gamma)
+{
+	// Determine the sizes of the source and target images.
+	float2 source_size = GetResolution();
+	float2 target_size = GetWindowResolution();
+	float2 inverted_target_size = GetInvWindowResolution();
+
+	// Compute the top-left and bottom-right corners of the target pixel box.
+	float2 t_beg = floor(uvw.xy * target_size);
+	float2 t_end = t_beg + float2(1.0, 1.0);
+
+	// Convert the target pixel box to source pixel box.
+	float2 beg = t_beg * inverted_target_size * source_size;
+	float2 end = t_end * inverted_target_size * source_size;
+
+	// Compute the top-left and bottom-right corners of the pixel box.
+	float2 f_beg = floor(beg);
+	float2 f_end = floor(end);
+
+	// Compute how much of the start and end pixels are covered horizontally & vertically.
+	float area_w = 1.0 - fract(beg.x);
+	float area_n = 1.0 - fract(beg.y);
+	float area_e = fract(end.x);
+	float area_s = fract(end.y);
+
+	// Compute the areas of the corner pixels in the pixel box.
+	float area_nw = area_n * area_w;
+	float area_ne = area_n * area_e;
+	float area_sw = area_s * area_w;
+	float area_se = area_s * area_e;
+
+	// Initialize the color accumulator.
+	float4 avg_color = float4(0.0, 0.0, 0.0, 0.0);
+	float avg_luminance = 0.0;
+	float4 temp_color;
+
+	float luminance_gamma = gamma; // For now, default to the user selected gamma (alternatively we could fix it to anything between 2.2 and 3.0)
+	float luminance_inv_gamma = 1.0 / luminance_gamma;
+
+	// Prevents rounding errors due to the coordinates flooring above
+	const float2 offset = float2(0.5, 0.5);
+
+	// Accumulate corner pixels.
+	temp_color = QuickSampleByPixel(float2(f_beg.x, f_beg.y) + offset, uvw.z, gamma);
+	avg_color += area_nw * temp_color;
+	avg_luminance += area_nw * pow(Luminance(temp_color.rgb, true), luminance_inv_gamma);
+	temp_color = QuickSampleByPixel(float2(f_end.x, f_beg.y) + offset, uvw.z, gamma);
+	avg_color += area_ne * temp_color;
+	avg_luminance += area_ne * pow(Luminance(temp_color.rgb, true), luminance_inv_gamma);
+	temp_color = QuickSampleByPixel(float2(f_beg.x, f_end.y) + offset, uvw.z, gamma);
+	avg_color += area_sw * temp_color;
+	avg_luminance += area_sw * pow(Luminance(temp_color.rgb, true), luminance_inv_gamma);
+	temp_color = QuickSampleByPixel(float2(f_end.x, f_end.y) + offset, uvw.z, gamma);
+	avg_color += area_se * temp_color;
+	avg_luminance += area_se * pow(Luminance(temp_color.rgb, true), luminance_inv_gamma);
+
+	// Determine the size of the pixel box.
+	int x_range = int(f_end.x - f_beg.x - 0.5);
+	int y_range = int(f_end.y - f_beg.y - 0.5);
+
+	// Workaround to compile the shader with DX11/12.
+	// If this isn't done, it will complain that the loop could have too many iterations.
+	// This number should be enough to guarantee downscaling from very high to very small resolutions.
+	// Note that this number might be referenced in the UI.
+	const int max_iterations = 16;
+
+	// Fix up the average calculations in case we reached the upper limit
+	x_range = min(x_range, max_iterations);
+	y_range = min(y_range, max_iterations);
+
+	// Accumulate top and bottom edge pixels.
+	for (int ix = 0; ix < max_iterations; ++ix)
+	{
+		if (ix < x_range)
+		{
+			float x = f_beg.x + 1.0 + float(ix);
+			temp_color = QuickSampleByPixel(float2(x, f_beg.y) + offset, uvw.z, gamma);
+			avg_color += area_n * temp_color;
+			avg_luminance += area_n * pow(Luminance(temp_color.rgb, true), luminance_inv_gamma);
+			temp_color = QuickSampleByPixel(float2(x, f_end.y) + offset, uvw.z, gamma);
+			avg_color += area_s * temp_color;
+			avg_luminance += area_s * pow(Luminance(temp_color.rgb, true), luminance_inv_gamma);
+		}
+	}
+
+	// Accumulate left and right edge pixels and all the pixels in between.
+	for (int iy = 0; iy < max_iterations; ++iy)
+	{
+		if (iy < y_range)
+		{
+			float y = f_beg.y + 1.0 + float(iy);
+			
+			temp_color = QuickSampleByPixel(float2(f_beg.x, y) + offset, uvw.z, gamma);
+			avg_color += area_w * temp_color;
+			avg_luminance += area_w * pow(Luminance(temp_color.rgb, true), luminance_inv_gamma);
+			temp_color = QuickSampleByPixel(float2(f_end.x, y) + offset, uvw.z, gamma);
+			avg_color += area_e * temp_color;
+			avg_luminance += area_e * pow(Luminance(temp_color.rgb, true), luminance_inv_gamma);
+
+			for (int ix = 0; ix < max_iterations; ++ix)
+			{
+				if (ix < x_range)
+				{
+					float x = f_beg.x + 1.0 + float(ix);
+					temp_color = QuickSampleByPixel(float2(x, y) + offset, uvw.z, gamma);
+					avg_color += temp_color;
+					avg_luminance += pow(Luminance(temp_color.rgb, true), luminance_inv_gamma);
+				}
+			}
+		}
+	}
+
+	// Compute the area of the pixel box that was sampled.
+	float area_corners = area_nw + area_ne + area_sw + area_se;
+	float area_edges = float(x_range) * (area_n + area_s) + float(y_range) * (area_w + area_e);
+	float area_center = float(x_range) * float(y_range);
+	
+	float4 nrm_color = avg_color / (area_corners + area_edges + area_center);
+	float target_nrm_color_luminance = avg_luminance / (area_corners + area_edges + area_center);
+
+	// Restore the averaged "gamma" space luminance, for better gamma correction.
+	// This retains the best feature of gamma correct sampling (no hue shifts),
+	// while also maintaining the perceptual "brightness" level of blending two colors with an alpha
+	// (in linear space a 0.5 alpha won't produce a color that has a perceptual brightness in the middle point of the two source colors).
+	float nrm_color_luminance = Luminance(nrm_color.rgb, true);
+	if (nrm_color_luminance != 0.0)
+	{
+		nrm_color.rgb *= pow(target_nrm_color_luminance, luminance_gamma) / nrm_color_luminance;
+	}
+
+	// Return the normalized average color.
+	return nrm_color;
+}
+
+/***** Main Functions *****/
+
+// Returns an accurate (gamma corrected) sample of a gamma space space texture.
+// Outputs in linear space for simplicity.
+float4 LinearGammaCorrectedSample(float gamma)
+{
+	float3 uvw = v_tex0;
+	float4 color = float4(0, 0, 0, 1);
+
+	if (resampling_method <= 1) // Bilinear
+	{
+		color = BilinearSample(uvw, gamma);
+	}
+	else if (resampling_method == 2) // Bicubic: B-Spline
+	{
+		color = BicubicSample(uvw, gamma, CUBIC_COEFF_GEN(1.0, 0.0));
+	}
+	else if (resampling_method == 3) // Bicubic: Mitchell-Netravali
+	{
+		color = BicubicSample(uvw, gamma, CUBIC_COEFF_GEN(1.0 / 3.0, 1.0 / 3.0));
+	}
+	else if (resampling_method == 4) // Bicubic: Catmull-Rom
+	{
+		color = BicubicSample(uvw, gamma, CUBIC_COEFF_GEN(0.0, 0.5));
+	}
+	else if (resampling_method == 5) // Sharp Bilinear
+	{
+		color = SharpBilinearSample(uvw, gamma);
+	}
+	else if (resampling_method == 6) // Area Sampling
+	{
+		color = AreaSampling(uvw, gamma);
+	}
+	else if (resampling_method == 7) // Nearest Neighbor
+	{
+		color = QuickSample(uvw, gamma);
+	}
+	else if (resampling_method == 8) // Bicubic: Hermite
+	{
+		color = BicubicSample(uvw, gamma, CUBIC_COEFF_GEN(0.0, 0.0));
+	}
+
+	return color;
+}
+
+void main()
+{
+	// This tries to fall back on GPU HW sampling if it can (it won't be gamma corrected).
+	bool raw_resampling = resampling_method <= 0;
+	bool needs_rescaling = GetResolution() != GetWindowResolution();
+
+	bool needs_resampling = needs_rescaling && (OptionEnabled(hdr_output) || OptionEnabled(correct_gamma) || !raw_resampling);
+
+	float4 color;
+
+	if (needs_resampling)
+	{
+		// Doing linear sampling in "gamma space" on linear texture formats isn't correct.
+		// If the source and target resolutions don't match, the GPU will return a color
+		// that is the average of 4 gamma space colors, but gamma space colors can't be blended together,
+		// gamma needs to be de-applied first. This makes a big difference if colors change
+		// drastically between two pixels.
+
+		color = LinearGammaCorrectedSample(game_gamma);
+	}
+	else
+	{
+		// Default GPU HW sampling. Bilinear is identical to Nearest Neighbor if the input and output resolutions match.
+		if (needs_rescaling)
+			color = texture(samp0, v_tex0);
+		else
+			color = texture(samp1, v_tex0);
+
+		// Convert to linear before doing any other of follow up operations.
+		color.rgb = pow(color.rgb, float3(game_gamma));
+	}
+
+	if (OptionEnabled(correct_color_space))
+	{
+		if (game_color_space == 0)
+			color.rgb = color.rgb * from_NTSCM;
+		else if (game_color_space == 1)
+			color.rgb = color.rgb * from_NTSCJ;
+		else if (game_color_space == 2)
+			color.rgb = color.rgb * from_PAL;
+	}
+
+	if (OptionEnabled(hdr_output))
+	{
+		float hdr_paper_white = hdr_paper_white_nits / hdr_sdr_white_nits;
+		color.rgb *= hdr_paper_white;
+	}
+
+	if (OptionEnabled(linear_space_output))
+	{
+		// Nothing to do here
+	}
+	// Correct the SDR gamma for sRGB (PC/Monitor) or ~2.2 (Common TV gamma)
+	else if (OptionEnabled(correct_gamma))
+	{
+		if (OptionEnabled(sdr_display_gamma_sRGB))
+			color.rgb = LinearTosRGBGamma(color.rgb);
+		else
+			color.rgb = pow(color.rgb, float3(1.0 / sdr_display_custom_gamma));
+	}
+	// Restore the original gamma without changes
+	else
+	{
+		color.rgb = pow(color.rgb, float3(1.0 / game_gamma));
+	}
+
+	SetOutput(color);
+}
@@ -0,0 +1,17 @@
+void main()
+{
+	float4 c0   = Sample();
+	float4 c1   = SampleOffset(int2(5, 5));
+	float y     = (0.222 * c1.r) + (0.707 * c1.g) + (0.071 * c1.b);
+	float y2    = ((0.222 * c0.r) + (0.707 * c0.g) + (0.071 * c0.b)) / 3.0;
+	float red   = c0.r;
+	float green = c0.g;
+	float blue  = c0.b;
+	float alpha = c0.a;
+
+	red	= y2 + (1.0 - y);
+	green	= y2 + (1.0 - y);
+	blue	= y2 + (1.0 - y);
+
+	SetOutput(float4(red, green, blue, alpha));
+}
@@ -0,0 +1,29 @@
+void main()
+{
+	float4 c0 = Sample();
+	float red	= 0.0;
+	float green	= 0.0;
+	float blue	= 0.0;
+
+	red = c0.r;
+
+	if (c0.r > 0.0 && c0.g > c0.r)
+		green = (c0.g - (c0.g - c0.r)) / 3.0;
+
+	if (c0.b > 0.0 && c0.r < 0.25)
+	{
+		red = c0.b;
+		green = c0.b / 3.0;
+	}
+
+	if (c0.g > 0.0 && c0.r < 0.25)
+	{
+		red = c0.g;
+		green = c0.g / 3.0;
+	}
+
+	if (((c0.r + c0.g + c0.b) / 3.0) > 0.9)
+		green = c0.r / 3.0;
+
+	SetOutput(float4(red, green, blue, 1.0));
+}
@@ -0,0 +1,13 @@
+void main()
+{
+	float4 c0 = Sample();
+	float red	= 0.0;
+	float green	= 0.0;
+	float blue	= 0.0;
+	float avg = (c0.r + c0.g + c0.b) / 3.0;
+
+	red = c0.r + (c0.g / 2.0) + (c0.b / 3.0);
+	green = c0.r / 3.0;
+
+	SetOutput(float4(red, green, blue, 1.0));
+}
@@ -0,0 +1,15 @@
+void main()
+{
+	float4 c0 = Sample();
+	float4 c1 = SampleOffset(int2( 1,  1));
+	float4 c2 = SampleOffset(int2(-1, -1));
+	float red	= c0.r;
+	float green	= c0.g;
+	float blue	= c0.b;
+	float alpha	= c0.a;
+
+	red = c0.r - c1.b;
+	blue = c0.b - c2.r + (c0.g - c0.r);
+
+	SetOutput(float4(red, 0.0, blue, alpha));
+}
@@ -0,0 +1,6 @@
+void main()
+{
+	float4 c0 = Sample();
+	float avg = (c0.r + c0.g + c0.b) / 3.0;
+	SetOutput(float4(avg, avg, avg, c0.a));
+}
@@ -0,0 +1,7 @@
+void main()
+{
+	float4 c0 = Sample();
+	// Info: https://web.archive.org/web/20040101053504/http://www.oreillynet.com:80/cs/user/view/cs_msg/8691
+	float avg = (0.222 * c0.r) + (0.707 * c0.g) + (0.071 * c0.b);
+	SetOutput(float4(avg, avg, avg, c0.a));
+}
@@ -0,0 +1,182 @@
+// Set aspect ratio to 'stretch'
+
+// Integer Scaling shader by One_More_Try / TryTwo
+// Uses Sharp Bilinear from
+// https://github.com/libretro/slang-shaders/blob/master/interpolation/shaders/sharp-bilinear.slang
+// by Themaister, Public Domain license
+
+/*
+[configuration]
+[OptionBool]
+GUIName = Please set aspect ratio to stretch
+OptionName = ASPECT_MSG
+DefaultValue = false
+
+[OptionBool]
+GUIName = Use non-integer width
+OptionName = WIDTH_UNLOCK
+DefaultValue = false
+
+[OptionBool]
+GUIName = Stretch width to window
+OptionName = WIDTH_SKIP
+DependentOption = WIDTH_UNLOCK
+DefaultValue = false
+
+[OptionBool]
+GUIName = Scale width to fit 4:3
+OptionName = WIDTH_43
+DependentOption = WIDTH_UNLOCK
+DefaultValue = false
+
+[OptionBool]
+GUIName = Scale width to fit 16:9
+OptionName = WIDTH_169
+DependentOption = WIDTH_UNLOCK
+DefaultValue = false
+
+[OptionBool]
+GUIName = Apply sharp bilinear for custom widths
+OptionName = SHARP_BILINEAR
+DefaultValue = false
+
+[OptionRangeFloat]
+GUIName = Sharp bilinear factor (0 = auto)
+OptionName = SHARP_PRESCALE
+MinValue = 0.0
+MaxValue = 16.0
+StepAmount = 1.0
+DefaultValue = 0.0
+
+[OptionBool]
+GUIName = Manual scale - Set IR first
+OptionName = MANUALSCALE
+DefaultValue = false
+
+[OptionRangeFloat]
+GUIName = Integer scale - No higher than IR
+OptionName = INTEGER_SCALE
+DependentOption = MANUALSCALE
+MaxValue = 5.0
+MinValue = 1.0
+DefaultValue = 1.0
+StepAmount = 1.0
+
+[OptionRangeFloat]
+GUIName = Scale width
+OptionName = WIDTH_SCALE
+DependentOption = MANUALSCALE
+MaxValue = 5.0
+MinValue = -2.0
+DefaultValue = 0.0
+StepAmount = 1.0
+
+[OptionBool]
+GUIName = Auto downscaling
+OptionName = DOWNSCALE
+DefaultValue = false
+
+[/configuration]
+*/
+
+void main()
+{
+	float4 c0 = float4(0.0, 0.0, 0.0, 0.0);
+	float2 scale = float2(1.0, 1.0);
+	float2 xfb_res = GetResolution();
+	float2 win_res = GetWindowResolution();
+	float2 coords = GetCoordinates();
+
+	// ratio is used to rescale the coords to the xfb size, which allows for integer scaling.
+	// ratio can then be multiplied by an integer to upscale/downscale, but upscale isn't supported by
+	// point-sampling.
+	float2 ratio = win_res / xfb_res;
+
+	if (OptionEnabled(WIDTH_UNLOCK))
+	{
+		if (OptionEnabled(WIDTH_SKIP))
+			ratio.x = 1.0;
+		else if (OptionEnabled(WIDTH_43))
+			ratio.x = win_res.x / (xfb_res.y * 4 / 3);
+		else if (OptionEnabled(WIDTH_169))
+			ratio.x = win_res.x / (xfb_res.y * 16 / 9);
+	}
+
+	if (OptionEnabled(MANUALSCALE))
+	{
+		// There's no IR variable, so this guesses the IR, but may be off for some games.
+		float calc_ir = ceil(xfb_res.y / 500);
+		scale.y = calc_ir / GetOption(INTEGER_SCALE);
+		float manual_width = GetOption(WIDTH_SCALE);
+
+		if (manual_width < 0.0)
+			scale.x = scale.y * (abs(manual_width) + 1);
+		else
+			scale.x = scale.y / (manual_width + 1);
+
+		ratio = ratio * scale;
+	}
+	else if (OptionEnabled(DOWNSCALE) && (ratio.x < 1 || ratio.y < 1))
+	{
+		scale.x = ceil(max(1.0 / ratio.y, 1.0 / ratio.x));
+		scale.y = scale.x;
+		ratio = ratio * scale;
+	}
+
+	// y and x are used to determine black bars vs drawable space.
+	float y = win_res.y - xfb_res.y / scale.y;
+	float y_top = (y / 2.0) * GetInvWindowResolution().y;
+	float y_bottom = (win_res.y - y / 2.0) * GetInvWindowResolution().y;
+	float yloc = (coords.y - y_top) * ratio.y;
+
+	float x = win_res.x - xfb_res.x / scale.x;
+
+	if (OptionEnabled(WIDTH_UNLOCK))
+	{
+		if (OptionEnabled(WIDTH_SKIP))
+			x = 0.0;
+		else if (OptionEnabled(WIDTH_43))
+			x = win_res.x - xfb_res.y / scale.y * 4 / 3;
+		else if (OptionEnabled(WIDTH_169))
+			x = win_res.x - xfb_res.y / scale.y * 16 / 9;
+	}
+
+	float x_left = (x / 2.0) * GetInvWindowResolution().x;
+	float x_right = (win_res.x - x / 2.0) * GetInvWindowResolution().x;
+	float xloc = (coords.x - x_left) * ratio.x;
+
+	if (OptionEnabled(SHARP_BILINEAR) &&
+		(OptionEnabled(WIDTH_SKIP) || OptionEnabled(WIDTH_43) || OptionEnabled(WIDTH_169) ||
+			(OptionEnabled(MANUALSCALE) && GetOption(WIDTH_SCALE) != 0.0)))
+	{
+		float texel = xloc * xfb_res.x;
+		float texel_floored = floor(texel);
+		float s = frac(texel);
+		float scale_sharp = GetOption(SHARP_PRESCALE);
+
+		if (scale_sharp == 0)
+		{
+			if (OptionEnabled(WIDTH_43))
+				scale_sharp = (4 / 3 * xfb_res.y / xfb_res.x);
+			else if (OptionEnabled(WIDTH_169))
+				scale_sharp = (16 / 9 * xfb_res.y / xfb_res.x);
+			else
+				scale_sharp = ceil(win_res.x / xfb_res.x);
+		}
+
+		float region_range = 0.5 - 0.5 / scale_sharp;
+		float center_dist = s - 0.5;
+		float f = (center_dist - clamp(center_dist, -region_range, region_range)) * scale_sharp + 0.5;
+
+		float mod_texel = texel_floored + f;
+		xloc = mod_texel / xfb_res.x;
+	}
+
+	if (coords.x >= x_left && x_right >= coords.x && coords.y >= y_top && y_bottom >= coords.y)
+	{
+		float2 sample_loc = float2(xloc, yloc);
+		c0 = SampleLocation(sample_loc);
+	}
+
+	SetOutput(c0);
+}
@@ -0,0 +1,4 @@
+void main()
+{
+	SetOutput(float4(1.0, 1.0, 1.0, 1.0) - Sample());
+}
@@ -0,0 +1,4 @@
+void main()
+{
+	SetOutput(float4(0.0, 0.0, 0.7, 1.0) - Sample());
+}
@@ -0,0 +1,7 @@
+void main()
+{
+	float4 c0 = Sample();
+	float4 c1 = SampleOffset(int2(5, 5));
+
+	SetOutput(c0 - c1);
+}
@@ -0,0 +1,93 @@
+/*
+[configuration]
+
+[OptionRangeFloat]
+GUIName = Distortion amount
+OptionName = DISTORTION_FACTOR
+MinValue = 1.0
+MaxValue = 10.0
+StepAmount = 0.5
+DefaultValue = 4.0
+
+[OptionRangeFloat]
+GUIName = Eye Distance Offset
+OptionName = EYE_OFFSET
+MinValue = 0.0
+MaxValue = 10.0
+StepAmount = 0.25
+DefaultValue = 5.0
+
+[OptionRangeFloat]
+GUIName = Zoom adjustment
+OptionName = SIZE_ADJUST
+MinValue = 0.0
+MaxValue = 1.0
+StepAmount = 0.025
+DefaultValue = 0.5
+
+[OptionRangeFloat]
+GUIName = Aspect Ratio adjustment
+OptionName = ASPECT_ADJUST
+MinValue = 0.0
+MaxValue = 1.0
+StepAmount = 0.025
+DefaultValue = 0.5
+
+[/configuration]
+*/
+
+
+void main()
+{
+  // Base Cardboard distortion parameters
+  float factor = GetOption(DISTORTION_FACTOR) * 0.01f;
+  float ka = factor * 3.0f;
+  float kb = factor * 5.0f;
+
+  // size and aspect adjustment
+  float sizeAdjust = 1.0f - GetOption(SIZE_ADJUST) + 0.5f;
+  float aspectAdjustment = 1.25f - GetOption(ASPECT_ADJUST);
+
+  // offset centering per eye
+  float stereoOffset = GetOption(EYE_OFFSET) * 0.01f;
+  float offsetAdd;
+
+  // layer0 = left eye, layer1 = right eye
+  if (src_layer == 1)
+  {
+    offsetAdd = stereoOffset;
+  }
+  else
+  {
+    offsetAdd = 0.0 - stereoOffset;
+  }
+
+  // convert coordinates to NDC space
+  float2 fragPos = (GetCoordinates() - 0.5f - float2(offsetAdd, 0.0f)) * 2.0f;
+
+  // Calculate the source location "radius" (distance from the centre of the viewport)
+  float destR = length(fragPos);
+
+  // find the radius multiplier
+  float srcR = destR * sizeAdjust + ( ka * pow(destR, 2.0) + kb * pow(destR, 4.0));
+
+  // Calculate the source vector (radial)
+  float2 correctedRadial = normalize(fragPos) * srcR;
+
+  // fix aspect ratio
+  float2 widenedRadial = correctedRadial * float2(aspectAdjustment, 1.0f);
+
+  // Transform the coordinates (from [-1,1]^2 to [0, 1]^2)
+  float2 uv = (widenedRadial/2.0f) + float2(0.5f, 0.5f) + float2(offsetAdd, 0.0f);
+
+  // Sample the texture at the source location
+  if (clamp(uv, 0.0, 1.0) != uv)
+  {
+    // black if beyond bounds
+    SetOutput(float4(0.0, 0.0, 0.0, 0.0));
+  }
+  else
+  {
+    SetOutput(SampleLocation(uv));
+  }
+}
@@ -0,0 +1,19 @@
+void main()
+{
+	float4 emboss = (SampleLocation(GetCoordinates()+GetInvResolution()) - SampleLocation(GetCoordinates()-GetInvResolution()))*2.0;
+	emboss -= (SampleLocation(GetCoordinates()+float2(1,-1)*GetInvResolution()).rgba - SampleLocation(GetCoordinates()+float2(-1,1)*GetInvResolution()).rgba);
+	float4 color = Sample();
+
+	if (color.r > 0.8 && color.b + color.b < 0.2)
+	{
+		SetOutput(float4(1,0,0,0));
+	}
+	else
+	{
+		color += emboss;
+		if (dot(color.rgb, float3(0.3, 0.5, 0.2)) > 0.5)
+			SetOutput(float4(1,1,1,1));
+		else
+			SetOutput(float4(0,0,0,0));
+	}
+}
@@ -0,0 +1,10 @@
+void main()
+{
+	float4 c0 = Sample();
+	float green = c0.g;
+
+	if (c0.g < 0.50)
+		green = c0.r + c0.b;
+
+	SetOutput(float4(0.0, green, 0.0, 1.0));
+}
@@ -0,0 +1,62 @@
+void main()
+{
+	//variables
+	float internalresolution = 1278.0;
+	float4 c0 = Sample();
+
+	//blur
+	float4 blurtotal = float4(0.0, 0.0, 0.0, 0.0);
+	float blursize = 1.5;
+	blurtotal += SampleLocation(GetCoordinates() + float2(-blursize, -blursize) * GetInvResolution());
+	blurtotal += SampleLocation(GetCoordinates() + float2(-blursize, blursize) * GetInvResolution());
+	blurtotal += SampleLocation(GetCoordinates() + float2( blursize, -blursize) * GetInvResolution());
+	blurtotal += SampleLocation(GetCoordinates() + float2( blursize, blursize) * GetInvResolution());
+	blurtotal += SampleLocation(GetCoordinates() + float2(-blursize, 0.0) * GetInvResolution());
+	blurtotal += SampleLocation(GetCoordinates() + float2( blursize, 0.0) * GetInvResolution());
+	blurtotal += SampleLocation(GetCoordinates() + float2( 0.0, -blursize) * GetInvResolution());
+	blurtotal += SampleLocation(GetCoordinates() + float2( 0.0, blursize) * GetInvResolution());
+	blurtotal *= 0.125;
+	c0 = blurtotal;
+
+	//greyscale
+	float grey = ((0.3 * c0.r) + (0.4 * c0.g) + (0.3 * c0.b));
+
+	// brighten
+	grey = grey * 0.5 + 0.7;
+
+	// darken edges
+	float x = GetCoordinates().x * GetResolution().x;
+	float y = GetCoordinates().y * GetResolution().y;
+	if (x > internalresolution/2.0)
+		x = internalresolution-x;
+	if (y > internalresolution/2.0)
+		y = internalresolution-y;
+	if (x > internalresolution/2.0*0.95)
+		x = internalresolution/2.0*0.95;
+	if (y > internalresolution/2.0*0.95)
+		y = internalresolution/2.0*0.95;
+	x = -x+641.0;
+	y = -y+641.0;
+
+	/*****inline square root routines*****/
+	// bit of a performance bottleneck.
+	// necessary to make the darkened area rounded
+	// instead of rhombus-shaped.
+	float sqrt = x / 10.0;
+
+	while ((sqrt*sqrt) < x)
+		sqrt+=0.1;
+	x = sqrt;
+	sqrt = y / 10.0;
+	while ((sqrt*sqrt) < y)
+		sqrt+=0.1;
+	y = sqrt;
+
+	x *= 2.0;
+	y *= 2.0;
+	grey -= x / 200.0;
+	grey -= y / 200.0;
+
+	// output
+	SetOutput(float4(0.0, grey, 0.0, 1.0));
+}
@@ -0,0 +1,71 @@
+void main()
+{
+	//variables
+	float internalresolution = 1278.0;
+	float4 c0 = Sample();
+
+	//blur
+	float4 blurtotal = float4(0.0, 0.0, 0.0, 0.0);
+	float blursize = 1.5;
+	blurtotal += SampleLocation(GetCoordinates() + float2(-blursize, -blursize)*GetInvResolution());
+	blurtotal += SampleLocation(GetCoordinates() + float2(-blursize,  blursize)*GetInvResolution());
+	blurtotal += SampleLocation(GetCoordinates() + float2( blursize, -blursize)*GetInvResolution());
+	blurtotal += SampleLocation(GetCoordinates() + float2( blursize,  blursize)*GetInvResolution());
+	blurtotal += SampleLocation(GetCoordinates() + float2(-blursize,  0.0)*GetInvResolution());
+	blurtotal += SampleLocation(GetCoordinates() + float2( blursize,  0.0)*GetInvResolution());
+	blurtotal += SampleLocation(GetCoordinates() + float2( 0.0, -blursize)*GetInvResolution());
+	blurtotal += SampleLocation(GetCoordinates() + float2( 0.0,  blursize)*GetInvResolution());
+	blurtotal *= 0.125;
+	c0 = blurtotal;
+
+	//greyscale
+	float grey = ((0.3 * c0.r) + (0.4 * c0.g) + (0.3 * c0.b));
+
+	// brighten and apply horizontal scanlines
+	// This would have been much simpler if I could get the stupid modulo (%) to work
+	// If anyone who is more well versed in Cg knows how to do this it'd be slightly more efficient
+	// float lineIntensity = ((GetCoordinates()[1] % 9) - 4) / 40;
+	float vPos = GetCoordinates().y*GetResolution().y / 9.0;
+	float lineIntensity = (((vPos - floor(vPos)) * 9.0) - 4.0) / 40.0;
+	grey = grey * 0.5 + 0.7 + lineIntensity;
+
+	// darken edges
+	float x = GetCoordinates().x * GetResolution().x;
+	float y = GetCoordinates().y * GetResolution().y;
+
+	if (x > internalresolution/2.0)
+		x = internalresolution-x;
+
+	if (y > internalresolution/2.0)
+		y = internalresolution-y;
+
+	if (x > internalresolution/2.0*0.95)
+		x = internalresolution/2.0*0.95;
+
+	if (y > internalresolution/2.0*0.95)
+		y = internalresolution/2.0*0.95;
+
+	x = -x + 641.0;
+	y = -y + 641.0;
+
+	//****inline square root routines*****/
+	// bit of a performance bottleneck.
+	// necessary to make the darkened area rounded
+	// instead of rhombus-shaped.
+	float sqrt = x / 10.0;
+	while ((sqrt*sqrt) < x)
+		sqrt+=0.1;
+	x = sqrt;
+	sqrt = y / 10.0;
+	while ((sqrt*sqrt) < y)
+		sqrt+=0.1;
+	y = sqrt;
+
+	x *= 2.0;
+	y *= 2.0;
+	grey -= x / 200.0;
+	grey -= y / 200.0;
+
+	// output
+	SetOutput(float4(0.0, grey, 0.0, 1.0));
+}
@@ -0,0 +1,18 @@
+void main()
+{
+	float4 c0 = Sample();
+	float red	= 0.0;
+	float green	= 0.0;
+	float blue	= 0.0;
+
+	if (c0.r > 0.25)
+		red = c0.r;
+
+	if (c0.g > 0.25)
+		green = c0.g;
+
+	if (c0.b > 0.25)
+		blue = c0.b;
+
+	SetOutput(float4(red, green, blue, 1.0));
+}
@@ -0,0 +1,18 @@
+float bound(float color)
+{
+	if (color < 0.35)
+	{
+		if (color < 0.25)
+			return color;
+
+		return 0.5;
+	}
+
+	return 1.0;
+}
+
+void main()
+{
+	float4 c0 = Sample();
+	SetOutput(float4(bound(c0.r), bound(c0.g), bound(c0.b), c0.a));
+}
@@ -0,0 +1,77 @@
+void main()
+{
+	float4 c0 = Sample();
+	float red = c0.r;
+	float blue = c0.b;
+	float green = c0.g;
+	float factor = 2.0;
+	float max = 0.8;
+	float min = 0.3;
+
+	if (c0.r > c0.g && c0.b > c0.g)
+	{
+		if (c0.r < c0.b + 0.05 && c0.b < c0.r + 0.05)
+		{
+			red = 0.7;
+			blue = 0.7;
+			green = 0.05;
+		}
+		else if (c0.r > c0.b + 0.05)
+		{
+			red = 0.7;
+			blue = 0.05;
+			green = 0.05;
+		}
+		else if (c0.b > c0.r + 0.05)
+		{
+			red = 0.05;
+			blue = 0.7;
+			green = 0.05;
+		}
+	}
+
+	if (c0.r > c0.b && c0.g > c0.b)
+	{
+		if (c0.r < c0.g + 0.05 && c0.g < c0.r + 0.05)
+		{
+			red = 0.7;
+			blue = 0.05;
+			green = 0.7;
+		}
+		else if (c0.r > c0.g + 0.05)
+		{
+			red = 0.7;
+			blue = 0.05;
+			green = 0.05;
+		}
+		else if (c0.g > c0.r + 0.05)
+		{
+			red = 0.05;
+			blue = 0.05;
+			green = 0.7;
+		}
+	}
+
+	if (c0.g > c0.r && c0.b > c0.r)
+	{
+		if (c0.g < c0.b + 0.05 && c0.b < c0.g + 0.05)
+		{
+			red = 0.05;
+			blue = 0.7;
+			green = 0.7;
+		}
+		else if (c0.g > c0.b + 0.05)
+		{
+			red = 0.05;
+			blue = 0.05;
+			green = 0.7;
+		}
+		else if (c0.b > c0.g + 0.05)
+		{
+			red = 0.05;
+			blue = 0.7;
+			green = 0.05;
+		}
+	}
+	SetOutput(float4(red, green, blue, c0.a));
+}
@@ -0,0 +1,13 @@
+void main()
+{
+	float4 c0 = Sample();
+
+	// Same coefficients as grayscale2 at this point
+	float avg = (0.222 * c0.r) + (0.707 * c0.g) + (0.071 * c0.b);
+	float red=avg;
+
+	// Not sure about these coefficients, they just seem to produce the proper yellow
+	float green=avg*.75;
+	float blue=avg*.5;
+	SetOutput(float4(red, green, blue, c0.a));
+}
@@ -0,0 +1,17 @@
+void main()
+{
+	float4 c0 = Sample();
+	float4 tmp = float4(0.0, 0.0, 0.0, 0.0);
+	tmp += c0 - SampleOffset(int2( 2,  2));
+	tmp += c0 - SampleOffset(int2(-2, -2));
+	tmp += c0 - SampleOffset(int2( 2, -2));
+	tmp += c0 - SampleOffset(int2(-2,  2));
+	float grey = ((0.222 * tmp.r) + (0.707 * tmp.g) + (0.071 * tmp.b));
+
+	// get rid of the bottom line, as it is incorrect.
+	if (GetCoordinates().y*GetResolution().y < 163.0)
+		tmp = float4(1.0, 1.0, 1.0, 1.0);
+
+	c0 = c0 + 1.0 - grey * 7.0;
+	SetOutput(float4(c0.r, c0.g, c0.b, 1.0));
+}
@@ -0,0 +1,18 @@
+void main()
+{
+	float4 c0 = Sample();
+	float red   = 0.0;
+	float blue  = 0.0;
+
+	if (c0.r < 0.35 || c0.b > 0.5)
+	{
+		red = c0.g + c0.b;
+	}
+	else
+	{
+		red = c0.g + c0.b;
+		blue = c0.r + c0.b;
+	}
+
+	SetOutput(float4(red, 0.0, blue, 1.0));
+}
@@ -0,0 +1,20 @@
+void main()
+{
+	float4 c0 = Sample();
+	float red   = 0.0;
+	float green = 0.0;
+	float blue  = 0.0;
+
+	if (c0.r < 0.35 || c0.b > 0.5)
+	{
+		red = c0.g + c0.b;
+	}
+	else
+	{
+		red = c0.g + c0.b;
+		blue = c0.r + c0.b;
+		green = c0.r + c0.b;
+	}
+
+	SetOutput(float4(red, green, blue, 1.0));
+}
@@ -0,0 +1,5 @@
+void main()
+{
+	float4 c0 = Sample();
+	SetOutput(float4(c0.r * 1.5, c0.g, c0.b * 0.5, c0.a));
+}
@@ -0,0 +1,4 @@
+void main()
+{
+	SetOutput(Sample().bgra);
+}
@@ -0,0 +1,4 @@
+void main()
+{
+	SetOutput(Sample().brga);
+}
@@ -0,0 +1,4 @@
+void main()
+{
+	SetOutput(Sample().gbra);
+}
@@ -0,0 +1,4 @@
+void main()
+{
+	SetOutput(Sample().grba);
+}
@@ -0,0 +1,4 @@
+void main()
+{
+	SetOutput(Sample().rbga);
+}
@@ -0,0 +1,20 @@
+void main()
+{
+	float4 c0 = Sample();
+	float red   = 0.0;
+	float green = 0.0;
+	float blue  = 0.0;
+
+	if (c0.r < 0.3 || c0.b > 0.5)
+	{
+		blue = c0.r + c0.b;
+		red = c0.g + c0.b / 2.0;
+	}
+	else
+	{
+		red = c0.g + c0.b;
+		green = c0.r + c0.b;
+	}
+
+	SetOutput(float4(red, green, blue, 1.0));
+}