From 4740c9a85c6f4b6e4b84587e948990821491e521 Mon Sep 17 00:00:00 2001
From: benkyd <benjaminkyd@gmail.com>
Date: Wed, 11 May 2022 13:44:34 +0000
Subject: [PATCH] WOrking version

---
 LICENSE                                       |   21 +
 legacy/resources/shaders/simple.frag          |   27 +
 legacy/resources/shaders/simple.vert          |   26 +
 legacy/resources/textures/bedrock.png         |  Bin 0 -> 225 bytes
 legacy/resources/textures/cobblestone.png     |  Bin 0 -> 568 bytes
 legacy/resources/textures/dirt.png            |  Bin 0 -> 266 bytes
 legacy/resources/textures/grass_side.png      |  Bin 0 -> 660 bytes
 legacy/resources/textures/grass_top.png       |  Bin 0 -> 766 bytes
 legacy/resources/textures/stone.png           |  Bin 0 -> 223 bytes
 legacy/src/config.hpp                         |   16 +
 legacy/src/game.cpp                           |  184 +
 legacy/src/game.hpp                           |   55 +
 legacy/src/main.cpp                           |   13 +
 legacy/src/physics/collider.cpp               |   35 +
 legacy/src/physics/collider.hpp               |   39 +
 legacy/src/renderer/renderer.cpp              |   16 +
 legacy/src/renderer/renderer.hpp              |   18 +
 legacy/src/util/filereader.cpp                |   15 +
 legacy/src/util/filereader.hpp                |   14 +
 legacy/src/world/block.cpp                    |   52 +
 legacy/src/world/block.hpp                    |   86 +
 legacy/src/world/chunk/chunk.cpp              |  295 +
 legacy/src/world/chunk/chunk.hpp              |   70 +
 legacy/src/world/chunk/face.hpp               |  127 +
 legacy/src/world/chunk/voxel.cpp              |  132 +
 legacy/src/world/chunk/voxel.hpp              |   33 +
 legacy/src/world/entity.cpp                   |   59 +
 legacy/src/world/entity.hpp                   |   51 +
 legacy/src/world/generator/chunkgenerator.cpp |   14 +
 legacy/src/world/generator/chunkgenerator.hpp |    6 +
 legacy/src/world/generator/chunkmanager.hpp   |   28 +
 legacy/src/world/world.cpp                    |  191 +
 legacy/src/world/world.hpp                    |   80 +
 src/Rendering/camera.cpp                      |  209 +
 src/Rendering/camera.hpp                      |   55 +
 src/Rendering/face.hpp                        |  128 +
 src/Rendering/frustrum.cpp                    |    3 +
 src/Rendering/frustrum.hpp                    |   29 +
 src/Rendering/mesh.cpp                        |    6 +
 src/Rendering/mesh.hpp                        |   27 +
 src/Rendering/renderable.cpp                  |   66 +
 src/Rendering/renderable.hpp                  |   45 +
 src/Rendering/rendermaster.cpp                |   11 +
 src/Rendering/rendermaster.hpp                |   41 +
 src/Rendering/shader.cpp                      |  121 +
 src/Rendering/shader.hpp                      |   30 +
 src/Rendering/texture.cpp                     |   60 +
 src/Rendering/texture.hpp                     |   14 +
 src/Rendering/voxelmesh.hpp                   |   23 +
 src/ThirdParty/fastnoise.cpp                  | 2250 +++++
 src/ThirdParty/fastnoise.hpp                  |  311 +
 src/ThirdParty/glad.c                         | 1843 ++++
 src/ThirdParty/stb_image.hpp                  | 7568 +++++++++++++++++
 src/ThirdParty/stb_image_write.hpp            | 7568 +++++++++++++++++
 src/display.cpp                               |  120 +
 src/display.hpp                               |   42 +
 src/settings.hpp                              |   14 +
 src/threadpool.hpp                            |    3 +
 src/utilities.hpp                             |   10 +
 59 files changed, 22300 insertions(+)
 create mode 100644 LICENSE
 create mode 100644 legacy/resources/shaders/simple.frag
 create mode 100644 legacy/resources/shaders/simple.vert
 create mode 100644 legacy/resources/textures/bedrock.png
 create mode 100644 legacy/resources/textures/cobblestone.png
 create mode 100644 legacy/resources/textures/dirt.png
 create mode 100644 legacy/resources/textures/grass_side.png
 create mode 100644 legacy/resources/textures/grass_top.png
 create mode 100644 legacy/resources/textures/stone.png
 create mode 100644 legacy/src/config.hpp
 create mode 100644 legacy/src/game.cpp
 create mode 100644 legacy/src/game.hpp
 create mode 100644 legacy/src/main.cpp
 create mode 100644 legacy/src/physics/collider.cpp
 create mode 100644 legacy/src/physics/collider.hpp
 create mode 100644 legacy/src/renderer/renderer.cpp
 create mode 100644 legacy/src/renderer/renderer.hpp
 create mode 100644 legacy/src/util/filereader.cpp
 create mode 100644 legacy/src/util/filereader.hpp
 create mode 100644 legacy/src/world/block.cpp
 create mode 100644 legacy/src/world/block.hpp
 create mode 100644 legacy/src/world/chunk/chunk.cpp
 create mode 100644 legacy/src/world/chunk/chunk.hpp
 create mode 100644 legacy/src/world/chunk/face.hpp
 create mode 100644 legacy/src/world/chunk/voxel.cpp
 create mode 100644 legacy/src/world/chunk/voxel.hpp
 create mode 100644 legacy/src/world/entity.cpp
 create mode 100644 legacy/src/world/entity.hpp
 create mode 100644 legacy/src/world/generator/chunkgenerator.cpp
 create mode 100644 legacy/src/world/generator/chunkgenerator.hpp
 create mode 100644 legacy/src/world/generator/chunkmanager.hpp
 create mode 100644 legacy/src/world/world.cpp
 create mode 100644 legacy/src/world/world.hpp
 create mode 100644 src/Rendering/camera.cpp
 create mode 100644 src/Rendering/camera.hpp
 create mode 100644 src/Rendering/face.hpp
 create mode 100644 src/Rendering/frustrum.cpp
 create mode 100644 src/Rendering/frustrum.hpp
 create mode 100644 src/Rendering/mesh.cpp
 create mode 100644 src/Rendering/mesh.hpp
 create mode 100644 src/Rendering/renderable.cpp
 create mode 100644 src/Rendering/renderable.hpp
 create mode 100644 src/Rendering/rendermaster.cpp
 create mode 100644 src/Rendering/rendermaster.hpp
 create mode 100644 src/Rendering/shader.cpp
 create mode 100644 src/Rendering/shader.hpp
 create mode 100644 src/Rendering/texture.cpp
 create mode 100644 src/Rendering/texture.hpp
 create mode 100644 src/Rendering/voxelmesh.hpp
 create mode 100644 src/ThirdParty/fastnoise.cpp
 create mode 100644 src/ThirdParty/fastnoise.hpp
 create mode 100644 src/ThirdParty/glad.c
 create mode 100644 src/ThirdParty/stb_image.hpp
 create mode 100644 src/ThirdParty/stb_image_write.hpp
 create mode 100644 src/display.cpp
 create mode 100644 src/display.hpp
 create mode 100644 src/settings.hpp
 create mode 100644 src/threadpool.hpp
 create mode 100644 src/utilities.hpp

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..0ec9b90
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 Benjamin Kyd
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/legacy/resources/shaders/simple.frag b/legacy/resources/shaders/simple.frag
new file mode 100644
index 0000000..949c2ec
--- /dev/null
+++ b/legacy/resources/shaders/simple.frag
@@ -0,0 +1,27 @@
+#version 450
+
+vec3 SkyColour = vec3(186.0f / 255.0f, 214.0f / 255.0f, 254.0f / 255.0f);
+
+in vec3 TexCoord;
+in float Distance;
+
+out vec4 outColour;
+
+uniform sampler2DArray tex;
+
+void main() {
+
+	outColour = texture(tex, TexCoord);
+	//outColour = vec4(.9, .9, .9, 1);
+
+	if (outColour.w == .0)
+		discard;
+	
+	float fogMax = 60000;
+	
+	vec3 colour = mix(outColour.xyz, SkyColour, min(1.0f, Distance / fogMax));
+
+	// Retain fragment transparency
+	outColour = vec4(colour, outColour.w);
+
+}
diff --git a/legacy/resources/shaders/simple.vert b/legacy/resources/shaders/simple.vert
new file mode 100644
index 0000000..bad13e0
--- /dev/null
+++ b/legacy/resources/shaders/simple.vert
@@ -0,0 +1,26 @@
+#version 450 
+
+layout (location = 0) in vec3 position;
+layout (location = 1) in vec3 texcoord;
+
+out vec3 TexCoord;
+out float Distance;
+
+uniform mat4 model;
+uniform mat4 view;
+uniform mat4 proj;
+
+void main() {
+	
+	TexCoord = texcoord;
+	
+	gl_Position = proj * view * model * vec4(position, 1.0);
+
+	// Makes no sense but it works
+	Distance = (
+		gl_Position.x * gl_Position.x + 
+		gl_Position.y * gl_Position.y + 
+		gl_Position.z * gl_Position.z
+	);
+
+}
diff --git a/legacy/resources/textures/bedrock.png b/legacy/resources/textures/bedrock.png
new file mode 100644
index 0000000000000000000000000000000000000000..1643c9958fa499aa6a57a36efbde0c479e00fe6a
GIT binary patch
literal 225
zcmV<703QE|P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV0001}Nkl<Zc-oCp
z+Y!JZ2;<Lz9L510%5mGnyYvDLwI8&Km<MQMjH~w^!gFh_9j&$fuj<S{d?AMy^4<Y)
z$Imu^CxOaqTm~DHVQo19z?{$Wg9Vmbg|g>F#%PL-74aEz1~lKyIiDiVXa7M##mtc#
z%Zg3STy?z)`(m#A!{X~9<&Lv_W@UB83}8|6RqkoxpZOmj5{fI&u##0A$Ed6X1#0Hi
bBT~ROrseuXuHE=G00000NkvXXu0mjfLQ-Os

literal 0
HcmV?d00001

diff --git a/legacy/resources/textures/cobblestone.png b/legacy/resources/textures/cobblestone.png
new file mode 100644
index 0000000000000000000000000000000000000000..da3498c5c8d0f279a5e8f62e254241debe421344
GIT binary patch
literal 568
zcmV-80>}M{P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00061Nkl<Zc-l=<
z*UG9v4BQ9w7dv*bD=1bF#e!YIexX<pQB=hG20mI2lS39S!nd1m(q^)%R;y7Y5}{Bi
zM4!)xZnqmX8V$;3vwS`tk5sSM8Dp_n$nADhI-RD=<-&M191b}c;K$=Z{eGX~@i^am
zy&i+c<1tMp6UK$ZVLF{oTm+`zJRA;GsZ_KRm5^YuSZH6j+a-s?p}SqJR@82{|0jTL
zw_CzExm=Ec2x>N)G@VZQ44>2Kr1SYq5@WSmNv+pwt_HxbB@zh^G?`3fx7!)B*=%%2
zNRY{7NWI^0s#dF%&*$m&dNFCw=aT`F===TBe!u5H=?GlmpDL9~eDVAJwA<~d)oL*x
zU|w2<AqZp+1_PSU=ZuFUs$4D;k`#+YuK4%&M+Sp|&rs-ky)qX*pN|zlr<fggsHH?g
zF#$u;<#MV0v)PPNsT7kv8jUC#jdEv7+QS&qCWwp0Vq8rUAQ=CRMkD<eRnXky@o>As
zy3CzSCV5U_9{+;DpbiFK3bVqAFTe<d5>NzyLZLurvsovFYFH`|sW%Z4VIQzvE|-fJ
z3hN5V?)N)alLR<-IvqAv0oCv@u<N2NIuahB4!}r?1dtd~%62M%z$y~Ki2!`D2T&sr
z2$0w7)!PR$Wm}<y!i%%nY$OXMnW6FTt3yX16+RgWlK%t2yKoAhtoe`t0000<MNUMn
GLSTX&oBR#{

literal 0
HcmV?d00001

diff --git a/legacy/resources/textures/dirt.png b/legacy/resources/textures/dirt.png
new file mode 100644
index 0000000000000000000000000000000000000000..617d353e0d67a14a2952b2a98375eb8a50b97d79
GIT binary patch
literal 266
zcmV+l0rmcgP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV0002dNkl<Zc-nQ6
zu?<2o3`D&SD<IKP(IbJx2q|e80R0Wvh3P;S>7w`V=NDEI+d2EruJ8Nt`?O29*Y((K
z=DVK`tGLh7MTtz%ds&v@Ga&DZ$;xQ1!Z)iihj}Hm*4l8eo1~Im3;+ie&}VWdpbDk0
z+*Vd{tJH?~A!2frcub(;r0KIVImhR2X8sP4PM&T#KQlJngXEsPNu_c;<ZNL3NyS>#
zj2A0tbfmyB_f1S^g6x^QT2YWaZ|9A0rZL;l$|NdG{#j@PkUL<J_U7*bfA)nat9qBH
QSpWb407*qoM6N<$f~2*1VgLXD

literal 0
HcmV?d00001

diff --git a/legacy/resources/textures/grass_side.png b/legacy/resources/textures/grass_side.png
new file mode 100644
index 0000000000000000000000000000000000000000..9ceef3b7537d70e63beb4aad1d74600e4466ff7f
GIT binary patch
literal 660
zcmV;F0&D$=P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV00078Nkl<ZcwS>H
z+3fY7k(rU<-=BXB-#+|e00DMx7KYDne=u+furu(9aWee+_J`s7$6pM8|NdiOVqygI
zKfU?EAg;{EzyyR0Kfe41E9MpDVBnMBWOzB@5d%9nD_H!+tq%<R;+za(N_-4ld~9IN
z96(w~hMVEvzyAzO+=A>3Zzewm+w}3}cZM%OlR>Uv<6>d>GXDjGpd>fgAdo9X6nGiF
zEPToE;n`P)-{1c-yq)rdficon^S`jZGsD}9d%@0^D3)h<(fg1A2DoCy7``oj#UPp|
z%kX;AV+I8YL52q}-h-t<Y{qFhUjHAz{J<b9F2JBHDa5c~$5{qjeMJV47)TXNF^CPa
zi1+0=u-d~H?}H6v0VxD20vT}k`8%*6x<-&qU@@R8d4Vnf=>ds37$`9??re?uf8fGB
zxRd#ed>KBS+YIJ|6o42Yd1N-oGMKBGWrcYfK#D*LK&F0sc9VghhlAnM*B@ZN{{8=-
z;nLmb47}VN4DY{u2aAK`K!$+yF~JxhTR>ib`5opB5FZpKAU?7jC=5V+7MN2&0x*BT
z*f75%Yee@76HF5<P+(q3@Gu64HYk8WVjv7+gIo#1ps)fN%pB#U+W@lx<X@0KjZ~!=
zKwbic#a9Vq29__+!2V}qWCVwYJTN9+-?;=1BgV>5tN$RAVQ~)&6HuB!jvWRx0P_mi
z3&;uQ-kV?GL<Djs%rKZYV2KK(8RiuxSe(KXvHtxAb{fcApuhwv289R6HL%z~b{PxI
uWgtb!wt@ml0hpjbVlXwZ7=uMA$Wj2wqC!&rQlG{E0000<MNUMnLSTX|nja<r

literal 0
HcmV?d00001

diff --git a/legacy/resources/textures/grass_top.png b/legacy/resources/textures/grass_top.png
new file mode 100644
index 0000000000000000000000000000000000000000..9c4366c3097989a7096a2bd9ad8260a016a3af6c
GIT binary patch
literal 766
zcmV<a0s;MrP)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV0008TNkl<ZI8QxO
zTS${(7=FHam`>;BhEp3evn`jpnYGF^DuTM`Ky(w`6a;l0ff3PN_uY0G2zD(|B2pJ2
zUX+E$tenTqX>(3@a?Hc@i|38};N$22zxR9I=kWfj>%%o|Fc`46w}(t71Fq82QY4c}
zn9XL`Y&L9dZDDhBQ)*bt_e3HAm&*n9wC}pk<#MPq*MLtJ%xx^8q@)BpJ3DeIaMUl)
zrOj1rY;2&ctPH!myC^IygnISqo_07K*xug8m+27{6%|2iEdX3B;sv|GEN5e(pTdqT
zFST|+-Fe-jEf$L$k}|vUawHPL{{FrIaGOjf*`%M(=TT5lATrd;!Mi#X3JJ~ca}zk#
z;1e0aU=VJ%8x<85csJNDEZXXu1?+2ZRKVC&Js-HGNqKQ`5$%m9F&_Sfs;VlHVbvyt
zGf@Gd!PK42W?{8jQCwUsTit&X(BA3>(G*Jmk?yHf3Pz(*irIy92~Lw|;vv*q4q+u1
zhar`Rdfa<OQx`u*vN5?3kH_KldWDtUZkIEfM}Nr1X{y51%8bZX54z84x4jNdr&F>f
zgKx9rIO91XCRJBgOXrb=A3~i{YK%5v%cY+6-_&bsYq;n?Cni$6qsS@)#CJ-eVM-|h
zTV7s9Yu#~7Ed540orZex@wPVSjYuFX$di$2T=2C^N6UUGd_Es~Klj1!_hWW;7UkvT
zGBb>rzNv>j9h%XNE>5A*aRjm5B-Yp0MJuWEN%yIrk{BZw0XR3RXR=GPYb+KML=M&e
z*=Gtgw7I!ix>A3O^O(dpJToS2RQK2u&De&fh7U%Vu~GAl&a0T8pNGfeK{y=7>P|vZ
zibkVos&)UH1O5x7GoOfvDVN*M3xkE^vH;Rxn#aX+GGJPa{BT7r)T6hZy0WrT4v8EH
w1f+oT?ss`5iO#0ect137FjS<@Cj1Hi0=0vO8h?Sj)c^nh07*qoM6N<$f;^FEhyVZp

literal 0
HcmV?d00001

diff --git a/legacy/resources/textures/stone.png b/legacy/resources/textures/stone.png
new file mode 100644
index 0000000000000000000000000000000000000000..87e19ff46b57f2bb86553459b2ebb2805ebbc55a
GIT binary patch
literal 223
zcmV<503iQ~P)<h;3K|Lk000e1NJLTq000mG000mO1^@s6AM^iV0001{Nkl<Zc-oCp
z!41S949mC-z#leX<eq}$%DB$8kF;PyY$wDR<LbR1k=R;mM{8~WtGvh$UqEmo<5>`Q
z@@yAK5Xrp8Ww0?B)}a6jDz5Sm3r@C%T7;e8YL1N+@fol{wcN}(?<B_6|Dm8_=FrBn
z;&3x}U9TekF<1U!@$rzf<1C+9SzRy#Se$%^-A(*5|4$D|Wx7eIiq|nZD~AF%^Xd_~
Z!apudcQEXJ=B5Au002ovPDHLkV1liwV4MH|

literal 0
HcmV?d00001

diff --git a/legacy/src/config.hpp b/legacy/src/config.hpp
new file mode 100644
index 0000000..ca4aea7
--- /dev/null
+++ b/legacy/src/config.hpp
@@ -0,0 +1,16 @@
+#ifndef MINECRAFT_CONFIG_H_
+#define MINECRAFT_CONFIG_H_
+
+#include "common.hpp"
+
+class Config {
+public:
+
+	std::string ResourceBase = MC_RESOURCES;
+    // std::string ResourceBase = "E:/Games/minecraft/resources/";
+
+};
+
+static Config GameConfig;
+
+#endif
diff --git a/legacy/src/game.cpp b/legacy/src/game.cpp
new file mode 100644
index 0000000..03feba9
--- /dev/null
+++ b/legacy/src/game.cpp
@@ -0,0 +1,184 @@
+#include "game.hpp"
+
+// For glm::vec2 as the key of a hashmap
+#define GLM_ENABLE_EXPERIMENTAL
+
+#define LOGGER_DEFINITION
+#include <logger.h>
+
+#include "renderer/renderer.hpp"
+#include "renderer/texture.hpp"
+#include "renderer/shader.hpp"
+#include "renderer/camera.hpp"
+
+#include "world/chunk/chunk.hpp"
+#include "world/entity.hpp"
+#include "world/world.hpp"
+#include "world/block.hpp"
+
+#include "common.hpp"
+#include "config.hpp"
+
+
+Game::Game() {
+
+}
+
+void Game::Setup(int w, int h) {
+
+	m_logger = std::make_shared<Logger>();
+
+	*m_logger << "----------------" << LOGGER_ENDL;
+	*m_logger << "Minecraft 1.14.2" << LOGGER_ENDL;
+	*m_logger << "----------------" << LOGGER_ENDL;
+	*m_logger << LOGGER_ENDL;
+
+#ifdef __DEBUG
+	*m_logger << LOGGER_DEBUG << "Debug mode enabled" << LOGGER_ENDL;
+#endif
+
+	*m_logger << LOGGER_INFO << "Initializing display" << LOGGER_ENDL;
+	SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO);
+
+	SDL_GL_SetAttribute(SDL_GL_RED_SIZE, 8);
+	SDL_GL_SetAttribute(SDL_GL_GREEN_SIZE, 8);
+	SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 8);
+	SDL_GL_SetAttribute(SDL_GL_ALPHA_SIZE, 8);
+	SDL_GL_SetAttribute(SDL_GL_BUFFER_SIZE, 32);
+	SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);
+
+	SDL_GL_SetAttribute(SDL_GL_MULTISAMPLEBUFFERS, 1);
+	SDL_GL_SetAttribute(SDL_GL_MULTISAMPLESAMPLES, 4);
+
+	SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
+	SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 5);
+
+	// Create GL window
+	*m_logger << LOGGER_INFO << "Creating window" << LOGGER_ENDL;
+	m_window = SDL_CreateWindow("Minecraft 1.14.2",
+		SDL_WINDOWPOS_CENTERED,
+		SDL_WINDOWPOS_CENTERED, w, h,
+		SDL_WINDOW_OPENGL | SDL_WINDOW_RESIZABLE);
+
+	// Create GL context
+	*m_logger << LOGGER_INFO << "Creating OpenGL context" << LOGGER_ENDL;
+	m_glContext = SDL_GL_CreateContext(m_window);
+
+	SDL_SetRelativeMouseMode(SDL_TRUE);
+
+	// Set VSYNC swap interval
+	SDL_GL_SetSwapInterval(1);
+
+	*m_logger << LOGGER_INFO << "Display set up" << LOGGER_ENDL;
+
+	// Load OpenGL
+	gladLoadGLLoader(SDL_GL_GetProcAddress);
+	glEnable(GL_MULTISAMPLE);
+	// glEnable(GL_CULL_FACE);
+	glCullFace(GL_BACK);
+	glEnable(GL_DEPTH_TEST);
+	// glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
+	
+	*m_logger << LOGGER_INFO << "Loaded OpenGL" << LOGGER_ENDL;
+	*m_logger << LOGGER_ENDL;
+	IsDisplayOpen = true;
+
+	std::shared_ptr<Camera> playercamera = std::make_shared<Camera>(w, h);
+	m_player = std::make_shared<Player>(glm::vec3(0), glm::vec3(0), playercamera);
+
+	std::shared_ptr<CBlockDictionary> BlockDictionary = CBlockDictionary::GetInstance();
+	
+	BlockDictionary->Build();
+
+	m_world = std::make_shared<World>();
+	
+	Texture texture;
+	m_world->SetTextureMap(texture.LoadTextures(BlockDictionary->Textures));
+
+	m_world->LoadWorld();
+	
+}
+
+void Game::Input(SDL_Event* e) {
+
+	Uint8* state = (Uint8*)SDL_GetKeyboardState(NULL);
+
+	while (SDL_PollEvent(e)) {
+
+
+		switch (e->type) {
+
+			case SDL_KEYDOWN:
+			{
+
+				if (e->key.keysym.sym == SDLK_ESCAPE) {
+					
+					IsMouseActive = !IsMouseActive;
+
+					if (IsMouseActive)
+						SDL_SetRelativeMouseMode(SDL_TRUE);
+					else
+						SDL_SetRelativeMouseMode(SDL_FALSE);
+
+				}
+
+				break;
+
+			}
+
+			case SDL_WINDOWEVENT: 
+			{
+
+				if (e->window.event == SDL_WINDOWEVENT_RESIZED) {
+
+					m_player->CameraUpdateProjection(e->window.data1, e->window.data2);
+					glViewport(0, 0, e->window.data1, e->window.data2);
+
+				}
+
+				break;
+
+			}
+
+			case SDL_QUIT: 
+			{
+
+				IsDisplayOpen = false;
+
+				break;
+
+			}
+
+		}
+
+		if (IsMouseActive) m_player->HandleMouseSDL(*e);
+
+	}
+
+	m_player->MoveSDL(state);
+
+}
+
+void Game::Run() {
+	
+	SDL_Event e;
+
+	const float clear[] = { 186.0f / 255.0f, 214.0f / 255.0f, 254.0f / 255.0f };
+	
+	m_renderer = std::make_unique<Renderer>();
+
+	while (IsDisplayOpen) {
+
+		Input(&e);
+
+		glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+		glClearBufferfv(GL_COLOR, 0, clear);
+
+		m_world->Update(m_player);
+		m_renderer->Render(m_world, m_player);
+
+		SDL_GL_SwapWindow(m_window);
+
+	}
+
+}
diff --git a/legacy/src/game.hpp b/legacy/src/game.hpp
new file mode 100644
index 0000000..daee357
--- /dev/null
+++ b/legacy/src/game.hpp
@@ -0,0 +1,55 @@
+#ifndef MINECRAFT_GAME_H_
+#define MINECRAFT_GAME_H_
+
+#ifdef NDEBUG
+#define __DEBUG
+#endif
+
+#define __DEBUG
+// #define __IMGUI
+
+#include <memory>
+#include <string>
+#include <map>
+
+#if _WIN32
+#include <SDL.h>
+#else
+#include <SDL2/SDL.h>
+#endif
+
+class Logger;
+
+class Renderer;
+class Camera;
+
+class Player;
+class World;
+	
+class Game {
+public:
+	Game();
+
+	void Setup(int w, int h);
+
+	void Input(SDL_Event* e);
+
+	void Run();
+
+	bool IsDisplayOpen = false;
+	bool IsMouseActive = true;
+
+private:
+	SDL_Window* m_window = nullptr;
+	SDL_GLContext m_glContext = nullptr;
+
+	std::shared_ptr<Logger> m_logger;
+
+	std::shared_ptr<Renderer> m_renderer;
+	std::shared_ptr<World> m_world;
+
+	std::shared_ptr<Player> m_player;
+	
+};
+
+#endif
diff --git a/legacy/src/main.cpp b/legacy/src/main.cpp
new file mode 100644
index 0000000..6fc4fe4
--- /dev/null
+++ b/legacy/src/main.cpp
@@ -0,0 +1,13 @@
+#include <iostream>
+
+#include "game.hpp"
+
+int main(int argc, char** argv) {
+
+	Game game;
+	game.Setup(1080, 720);
+	game.Run();
+
+	return 0;
+
+}
diff --git a/legacy/src/physics/collider.cpp b/legacy/src/physics/collider.cpp
new file mode 100644
index 0000000..07fda40
--- /dev/null
+++ b/legacy/src/physics/collider.cpp
@@ -0,0 +1,35 @@
+#include "collider.hpp"
+
+EntityCollider::EntityCollider() {
+
+}
+
+glm::vec3 EntityCollider::TerrainCollide(std::vector<uint8_t> terrain) {
+
+
+
+}
+
+bool EntityCollider::m_aabb(ColliderBox a, ColliderBox b) {
+
+    return {
+        (a.Min.x <= b.Min.x + b.Max.x && a.Min.x + a.Max.x >= b.Min.x) && 
+        (a.Min.y <= b.Min.y + b.Max.y && a.Min.y + a.Max.y >= b.Min.y) && 
+        (a.Min.z <= b.Min.z + b.Max.z && a.Min.z + a.Max.z >= b.Min.z) 
+    };
+
+}
+
+float EntityCollider::m_xDepth(ColliderBox a, ColliderBox b) {
+
+}
+
+
+float EntityCollider::m_yDepth(ColliderBox a, ColliderBox b) {
+ 
+}
+
+
+float EntityCollider::m_zDepth(ColliderBox a, ColliderBox b) {
+    
+}
diff --git a/legacy/src/physics/collider.hpp b/legacy/src/physics/collider.hpp
new file mode 100644
index 0000000..49e24f6
--- /dev/null
+++ b/legacy/src/physics/collider.hpp
@@ -0,0 +1,39 @@
+#ifndef MINECRAFT_PHYSICS_COLLIDER_H_
+#define MINECRAFT_PHYSICS_COLLIDER_H_
+
+#include "../common.hpp"
+
+class ColliderBox {
+public:
+    glm::vec3 Min;
+    glm::vec3 Max;
+};
+
+class Collider : public ColliderBox {
+public:
+
+};
+
+// TODO: Trees
+class EntityCollider {
+public:
+
+    EntityCollider();
+
+    // Surrounding blocks indexed XYZ
+    // Returns point of collision 
+    glm::vec3 TerrainCollide(std::vector<uint8_t> surroundingBlocks);     
+
+    ColliderBox Bounds;
+
+private:
+
+    bool m_aabb(ColliderBox a, ColliderBox b);
+    
+    float m_xDepth(ColliderBox a, ColliderBox b);
+    float m_yDepth(ColliderBox a, ColliderBox b);
+    float m_zDepth(ColliderBox a, ColliderBox b);
+
+};
+
+#endif
diff --git a/legacy/src/renderer/renderer.cpp b/legacy/src/renderer/renderer.cpp
new file mode 100644
index 0000000..f927606
--- /dev/null
+++ b/legacy/src/renderer/renderer.cpp
@@ -0,0 +1,16 @@
+#include "renderer.hpp"
+
+#include "../world/chunk/chunk.hpp"
+#include "../world/world.hpp"
+#include "shader.hpp"
+
+Renderer::Renderer() {
+
+}
+
+// Perform the render passes
+void Renderer::Render(std::shared_ptr<World> world, std::shared_ptr<Entity> entity) {
+
+	world->Render(entity);
+
+}
diff --git a/legacy/src/renderer/renderer.hpp b/legacy/src/renderer/renderer.hpp
new file mode 100644
index 0000000..3edbec3
--- /dev/null
+++ b/legacy/src/renderer/renderer.hpp
@@ -0,0 +1,18 @@
+#ifndef MINECRAFT_RENDERER_RENDERER_H_
+#define MINECRAFT_RENDERER_RENDERER_H_
+
+#include "../common.hpp"
+
+class Entity;
+class World;
+
+// Does GL render passes then returns to the game loop
+class Renderer {
+public:
+	Renderer();
+
+	void Render(std::shared_ptr<World> world, std::shared_ptr<Entity> entity);
+
+};
+
+#endif
diff --git a/legacy/src/util/filereader.cpp b/legacy/src/util/filereader.cpp
new file mode 100644
index 0000000..8228074
--- /dev/null
+++ b/legacy/src/util/filereader.cpp
@@ -0,0 +1,15 @@
+#include "filereader.hpp"
+
+#include <fstream>
+
+FileReader::FileReader() {
+
+}
+
+std::string FileReader::LoadTextFromFile(std::string path) {
+	std::ifstream t(path);
+	std::string text((std::istreambuf_iterator<char>(t)),
+		std::istreambuf_iterator<char>());
+	return text;
+}
+
diff --git a/legacy/src/util/filereader.hpp b/legacy/src/util/filereader.hpp
new file mode 100644
index 0000000..dc1cf24
--- /dev/null
+++ b/legacy/src/util/filereader.hpp
@@ -0,0 +1,14 @@
+#ifndef MINECRAFT_UTIL_FILEREADER_H_
+#define MINECRAFT_UTIL_FILEREADER_H_
+
+#include <string>
+
+class FileReader {
+public:
+	FileReader();
+
+	std::string LoadTextFromFile(std::string path);
+
+};
+
+#endif
diff --git a/legacy/src/world/block.cpp b/legacy/src/world/block.cpp
new file mode 100644
index 0000000..39b400d
--- /dev/null
+++ b/legacy/src/world/block.cpp
@@ -0,0 +1,52 @@
+#include "block.hpp"
+
+#include "../config.hpp"
+
+#include <iostream>
+
+std::shared_ptr<CBlockDictionary> CBlockDictionary::Instance;
+
+std::shared_ptr<CBlockDictionary> CBlockDictionary::GetInstance() {
+
+	if (!CBlockDictionary::Instance) {
+
+		CBlockDictionary::Instance = std::make_shared<CBlockDictionary>();
+
+	}
+
+	return CBlockDictionary::Instance;
+
+}
+
+
+void CBlockDictionary::Build() {
+
+	// Order matters !
+	RegisterTexture("stone.png");
+	RegisterTexture("dirt.png");
+	RegisterTexture("grass_side.png");
+	RegisterTexture("grass_top.png");
+	RegisterTexture("cobblestone.png");
+	RegisterTexture("bedrock.png");
+	
+	// Texture winding order - top, bottom, left, right, front, back
+	RegisterBlock(EBlockType::Air, 			{ });
+	RegisterBlock(EBlockType::Stone,		{ EFaceTexture::Stone,			EFaceTexture::Stone,		EFaceTexture::Stone,		EFaceTexture::Stone,		EFaceTexture::Stone,		EFaceTexture::Stone });
+	RegisterBlock(EBlockType::Dirt,			{ EFaceTexture::Dirt,			EFaceTexture::Dirt,			EFaceTexture::Dirt,			EFaceTexture::Dirt,			EFaceTexture::Dirt,			EFaceTexture::Dirt });
+	RegisterBlock(EBlockType::Grass,		{ EFaceTexture::Grass,			EFaceTexture::Dirt,			EFaceTexture::GrassSide,	EFaceTexture::GrassSide,	EFaceTexture::GrassSide,	EFaceTexture::GrassSide });
+	RegisterBlock(EBlockType::Cobblestone,	{ EFaceTexture::Cobblestone,	EFaceTexture::Cobblestone,	EFaceTexture::Cobblestone,	EFaceTexture::Cobblestone,	EFaceTexture::Cobblestone,	EFaceTexture::Cobblestone });
+	RegisterBlock(EBlockType::Bedrock,		{ EFaceTexture::Bedrock,		EFaceTexture::Bedrock,		EFaceTexture::Bedrock,		EFaceTexture::Bedrock,		EFaceTexture::Bedrock,		EFaceTexture::Bedrock });
+
+}
+
+void CBlockDictionary::RegisterTexture(std::string texture) {
+
+	Textures.push_back(texture);
+
+}
+
+void CBlockDictionary::RegisterBlock(EBlockType::Block block, std::vector<uint16_t> faceTextures) {
+
+	BlockEntries[block] = std::make_shared<CBlockEntry>((uint8_t)block, faceTextures);
+
+}
diff --git a/legacy/src/world/block.hpp b/legacy/src/world/block.hpp
new file mode 100644
index 0000000..ea7686e
--- /dev/null
+++ b/legacy/src/world/block.hpp
@@ -0,0 +1,86 @@
+#ifndef MINECRAFT_WORLD_BLOCK_H_
+#define MINECRAFT_WORLD_BLOCK_H_
+
+#include "../common.hpp"
+
+
+namespace EBlockType {
+
+	enum Block : uint8_t {
+
+		Air = 0,
+		Stone,
+		Grass,
+		Dirt,
+		Cobblestone,
+		Bedrock
+
+	};
+
+}
+
+namespace EFaceTexture {
+
+	enum Texture : uint16_t {
+
+		Stone,
+		Dirt,
+		GrassSide,
+		Grass,
+		Cobblestone,
+		Bedrock
+
+	};
+
+}
+
+// Texture winding order - top, bottom, left, right, front, back
+class CBlockEntry {
+public:
+
+	CBlockEntry(uint8_t id, std::vector<uint16_t> faceTextures)
+		: ID(id), FaceTextures(faceTextures) { }
+
+	uint8_t ID;
+	std::vector<uint16_t> FaceTextures;
+	
+};
+
+// TODO: Make design of the class data oriented
+// ie, import all the data used in the build from
+// files and that
+class CBlockDictionary {
+public:
+
+	static std::shared_ptr<CBlockDictionary> GetInstance();
+
+	static std::shared_ptr<CBlockDictionary> Instance;
+
+public:
+
+	void Build();
+
+	// The index of the texutres path in this array is equal to
+	// that textures ID, to be referenced in the block entry
+	std::vector<std::string> Textures;
+
+	// Only supports up to 255 blocs, 0 being air
+	// word stores vectors of chunks which are 16x16x256
+	// vectors of uint8_t which reference the block dictionary
+	std::map<uint8_t, std::shared_ptr<CBlockEntry>> BlockEntries;
+
+	// Expects textures to be inserted in order, 0-...
+	void RegisterTexture(std::string texture);
+	
+	void RegisterBlock(EBlockType::Block block, std::vector<uint16_t> faceTextures);
+
+};
+
+
+// static std::vector<std::pair<int, std::string>> TextureIdsAndPaths {
+// 	{0, "dirt.png"},
+// 	{1, "grass_side.png"},
+// 	{2, "grass_top.png"}
+// };
+
+#endif
diff --git a/legacy/src/world/chunk/chunk.cpp b/legacy/src/world/chunk/chunk.cpp
new file mode 100644
index 0000000..d46c902
--- /dev/null
+++ b/legacy/src/world/chunk/chunk.cpp
@@ -0,0 +1,295 @@
+#include "chunk.hpp"
+#include "voxel.hpp"
+
+#include "../../renderer/shader.hpp"
+#include "../../renderer/camera.hpp"
+
+#include "../block.hpp"
+
+#include "../../util/fastnoise.hpp"
+
+#include <random>
+
+static std::default_random_engine generator;
+
+Chunk::Chunk() {
+
+}
+
+Chunk::Chunk(int x, int z) {
+
+	X = x, Z = z;
+
+	Load();
+
+}
+
+Chunk::Chunk(int x, int z, std::vector<uint8_t> voxels) {
+
+	X = x, Z = z;
+	
+	Voxels = voxels;
+	
+	Load();
+
+}
+
+Chunk::Chunk(int x, int z, std::shared_ptr<FastNoise> terrainGenerator) {
+
+	X = x, Z = z;
+	int y;
+
+	for (x = 0; x < CHUNK_WIDTH; x++)
+	for (y = 0; y < CHUNK_HEIGHT; y++)
+	for (z = 0; z < CHUNK_DEPTH; z++) {
+
+		if (y == 0) {
+			Voxels.push_back((uint8_t)EBlockType::Bedrock);
+			continue;
+		}
+
+		if (y == 1 && (float)rand() / (float)RAND_MAX > 0.5f) {
+			Voxels.push_back((uint8_t)EBlockType::Bedrock);
+			continue;
+		}
+
+		if (pow((y / (float)CHUNK_HEIGHT), 1.1024f) + terrainGenerator->GetNoise(x + (Z * CHUNK_WIDTH), y, z + (X * CHUNK_DEPTH))  * 0.40f < 0.5f) {
+	
+			Voxels.push_back((uint8_t)EBlockType::Grass);
+			continue;
+				
+		}
+		
+		Voxels.push_back((uint8_t)EBlockType::Air);
+
+	}
+
+	for (x = 0; x < CHUNK_WIDTH; x++)
+	for (y = 0; y < CHUNK_HEIGHT; y++)
+	for (z = 0; z < CHUNK_DEPTH; z++) {
+
+		if (BlockAt(x, y, z) == EBlockType::Bedrock)
+			continue;
+
+		// No need for bounds checking as a closed loop
+		if (BlockAt(x, y + 1, z) == EBlockType::Grass)
+			Voxels[x + CHUNK_WIDTH * (y + CHUNK_HEIGHT * z)] = EBlockType::Dirt;
+
+	}
+
+	// Add stone 3 layers below dirt
+	for (x = 0; x < CHUNK_WIDTH; x++)
+	for (y = 0; y < CHUNK_HEIGHT; y++)
+	for (z = 0; z < CHUNK_DEPTH; z++) {
+
+		if (BlockAt(x, y, z) == EBlockType::Bedrock)
+			continue;
+
+		if (BlockAt(x, y + 1, z) == EBlockType::Dirt)
+		if (BlockAt(x, y + 2, z) == EBlockType::Dirt) 
+		// if (BlockAt(x, y + 3, z) == EBlockType::Dirt) 
+			Voxels[x + CHUNK_WIDTH * (y + CHUNK_HEIGHT * z)] = EBlockType::Stone;
+	
+	}
+	
+	// Add the rest of the stone
+	for (x = 0; x < CHUNK_WIDTH; x++)
+	for (y = 0; y < CHUNK_HEIGHT; y++)
+	for (z = 0; z < CHUNK_DEPTH; z++) {
+
+		if (BlockAt(x, y, z) == EBlockType::Bedrock)
+			continue;
+
+		if (BlockAt(x, y + 1, z) == EBlockType::Stone)
+			Voxels[x + CHUNK_WIDTH * (y + CHUNK_HEIGHT * z)] = EBlockType::Stone;
+
+	}
+
+	Load();
+
+}
+
+void Chunk::Load() {
+
+	if (Loaded)
+		return;
+
+	m_model = glm::translate(glm::mat4(1.0f), { X * CHUNK_WIDTH, 0, Z * CHUNK_DEPTH });
+
+	if (!Voxels.empty()) {
+		m_mesh();
+		Loaded = true;
+		return;
+	}
+
+	// Generate a superflat chunk if nothing is there
+	// [x + WIDTH * (y + HEIGHT * z)]
+	for (int x = 0; x < CHUNK_WIDTH; x++)
+	for (int y = 0; y < CHUNK_HEIGHT; y++)
+	for (int z = 0; z < CHUNK_DEPTH; z++) {
+
+		if (y > 32) {
+			Voxels.push_back((uint8_t)EBlockType::Air);
+			continue;
+		}
+
+		if (y == 0)
+			Voxels.push_back((uint8_t)EBlockType::Bedrock);
+		else if (y < 28)
+			Voxels.push_back((uint8_t)EBlockType::Stone);
+		else if (y < 32)
+			Voxels.push_back((uint8_t)EBlockType::Dirt);
+		else
+			Voxels.push_back((uint8_t)EBlockType::Grass);
+
+	}
+
+	m_mesh();
+	Loaded = true;
+
+}
+
+void Chunk::Unload() {
+	
+	m_vertices.clear();
+	m_uvs.clear();
+
+	glBindVertexArray(m_vao);
+
+	glDeleteBuffers(1, &m_vbo);
+	glDeleteVertexArrays(1, &m_vao);
+
+	Loaded = false;
+
+}
+
+void Chunk::UploadMesh() {
+
+	if (!MeshReady || !Loaded)
+		return;
+
+	glGenVertexArrays(1, &m_vao);
+	glBindVertexArray(m_vao);
+
+	glGenBuffers(1, &m_vbo);
+	glBindBuffer(GL_ARRAY_BUFFER, m_vbo);
+
+	std::vector<glm::vec3> data;
+	data.insert(data.end(), m_vertices.begin(), m_vertices.end());
+	data.insert(data.end(), m_uvs.begin(), m_uvs.end());
+
+	m_numVerts = m_vertices.size();
+
+	glBufferData(GL_ARRAY_BUFFER, data.size() * sizeof(glm::vec3), &data[0], GL_STATIC_DRAW);
+
+	glEnableVertexAttribArray(0);
+	glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 0, (const void*)0);
+
+	glEnableVertexAttribArray(1);
+	glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, 0, (const void*)(m_vertices.size() * sizeof(glm::vec3)));
+
+	m_vertices.clear();
+	m_uvs.clear();
+
+	data.clear();
+
+	glBindVertexArray(0);
+
+	MeshReady = !MeshReady;
+
+}
+
+void Chunk::Render(std::shared_ptr<Camera> camera, std::shared_ptr<Shader> shader) {
+
+	if (!MeshReady || !Loaded)
+		return;
+
+	shader->Use();
+	glBindVertexArray(m_vao);
+
+	GLint uniTrans = glGetUniformLocation(shader->Program, "model");
+	glUniformMatrix4fv(uniTrans, 1, GL_FALSE, glm::value_ptr(m_model));
+
+	GLint uniView = glGetUniformLocation(shader->Program, "view");
+	glUniformMatrix4fv(uniView, 1, GL_FALSE, glm::value_ptr(camera->GetViewMatrix()));
+
+	GLint uniProj = glGetUniformLocation(shader->Program, "proj");
+	glUniformMatrix4fv(uniProj, 1, GL_FALSE, glm::value_ptr(camera->GetProjectionMatrix()));
+
+	glDrawArrays(GL_TRIANGLES, 0, m_numVerts);
+
+}
+
+void Chunk::Update(std::vector<uint8_t> voxels) {
+
+	Voxels = voxels;
+	m_mesh();
+
+}
+
+uint8_t Chunk::BlockAt(int x, int y, int z) {
+
+	if (x > CHUNK_WIDTH  - 1) return 0;
+	if (y > CHUNK_HEIGHT - 1) return 0;
+	if (z > CHUNK_DEPTH  - 1) return 0;
+
+	if (x < 0) return 0;
+	if (y < 0) return 0;
+	if (z < 0) return 0;
+
+	return Voxels[x + CHUNK_WIDTH * (y + CHUNK_HEIGHT * z)];
+
+}
+
+void Chunk::m_mesh() {
+
+	// TODO: Use greedy meshing for MAXIMUM performance
+	for (int x = 0; x < CHUNK_WIDTH; x++)
+	for (int y = 0; y < CHUNK_HEIGHT; y++)
+	for (int z = 0; z < CHUNK_DEPTH; z++) {
+
+		std::vector<glm::vec3> tempVerts;
+		std::vector<glm::vec3> tempUVs;
+
+		uint8_t block = BlockAt(x, y, z);
+
+		if (block == EBlockType::Air) continue;
+
+		Voxel tmp({x, y, z}, block);
+
+		if (BlockAt(x + 1, y, z) == EBlockType::Air)
+			tmp.AddFace(EFaceType::Right);
+
+		if (BlockAt(x - 1, y, z) == EBlockType::Air)
+			tmp.AddFace(EFaceType::Left);
+
+		if (BlockAt(x, y + 1, z) == EBlockType::Air)
+			tmp.AddFace(EFaceType::Top);
+
+		if (BlockAt(x, y - 1, z) == EBlockType::Air)
+			tmp.AddFace(EFaceType::Bottom);
+
+		if (BlockAt(x, y, z + 1) == EBlockType::Air)
+			tmp.AddFace(EFaceType::Front);
+
+		if (BlockAt(x, y, z - 1) == EBlockType::Air)
+			tmp.AddFace(EFaceType::Back);
+		
+		tmp.GetMesh(tempVerts, tempUVs);
+
+		m_vertices.insert(m_vertices.end(), tempVerts.begin(), tempVerts.end());
+		m_uvs.insert(m_uvs.end(), tempUVs.begin(), tempUVs.end());
+
+		tmp.Clear();
+
+	}
+
+	MeshReady = true;
+
+}
+
+Chunk::~Chunk() {
+
+	Unload();
+
+}
diff --git a/legacy/src/world/chunk/chunk.hpp b/legacy/src/world/chunk/chunk.hpp
new file mode 100644
index 0000000..0422df7
--- /dev/null
+++ b/legacy/src/world/chunk/chunk.hpp
@@ -0,0 +1,70 @@
+#ifndef MINECRAFT_RENDERER_CHUNK_H_
+#define MINECRAFT_RENDERER_CHUNK_H_
+
+#include "../../common.hpp"
+
+#define CHUNK_HEIGHT 128
+#define CHUNK_WIDTH  16
+#define CHUNK_DEPTH  16
+
+class FastNoise;
+
+class Camera;
+class Shader;
+
+class Voxel;
+
+class Chunk {
+public:
+
+	Chunk();
+	Chunk(int x, int z);
+	Chunk(int x, int z, std::vector<uint8_t> voxels);
+	Chunk(int x, int z, std::shared_ptr<FastNoise> terrainGenerator);
+
+	void Load();
+	void Unload();
+
+	void UploadMesh();
+	bool MeshReady = false;
+
+	void Render(std::shared_ptr<Camera> camera, std::shared_ptr<Shader> shader);
+
+	void Update(std::vector<uint8_t> voxels);
+
+	uint8_t BlockAt(int x, int y, int z);
+
+	// Indexed sequentially [x + WIDTH * (y + HEIGHT * z)] = voxelID
+	// the voxel id is used to index the block dictionary to get properties
+	// to generate a mesh and send it to the GPU
+	std::vector<uint8_t> Voxels;
+
+	// To only be changed by the class its self 
+	bool Loaded = false;
+	// To only be changed by render components
+	bool ShouldRender = false;
+
+	// Chunk World pos
+	int X,Z;
+
+	~Chunk();
+
+private:
+
+	void m_mesh();
+
+	GLuint m_vao = 0;
+	GLuint m_vbo = 0;
+
+	// Must be translated by a multiple of 16 in the x or z, nothing in y
+	glm::mat4 m_model;
+
+	std::vector<glm::vec3> m_vertices;
+	int m_numVerts = 0;
+
+	std::vector<glm::vec3> m_uvs;
+
+
+};
+
+#endif
diff --git a/legacy/src/world/chunk/face.hpp b/legacy/src/world/chunk/face.hpp
new file mode 100644
index 0000000..20dc525
--- /dev/null
+++ b/legacy/src/world/chunk/face.hpp
@@ -0,0 +1,127 @@
+#ifndef MINECRAFT_RENDERER_FACE_H_
+#define MINECRAFT_RENDERER_FACE_H_
+
+#include "../../common.hpp"
+
+namespace EFaceType {
+
+	enum Face : uint8_t {
+		Top,
+		Bottom,
+		Left,
+		Right,
+		Front,
+		Back,
+	};
+
+}
+
+static std::vector<glm::vec3> CubeTopFace = {
+	{ -0.5f,  0.5f, -0.5f },
+	{  0.5f,  0.5f, -0.5f },
+	{  0.5f,  0.5f,  0.5f },
+	{  0.5f,  0.5f,  0.5f },
+	{ -0.5f,  0.5f,  0.5f },
+	{ -0.5f,  0.5f, -0.5f }
+};
+
+static std::vector<glm::vec2> CubeTopFaceUVs = {
+	{ 0.0f, 0.0f },
+	{ 1.0f, 0.0f },
+	{ 1.0f, 1.0f },
+	{ 1.0f, 1.0f },
+	{ 0.0f, 1.0f },
+	{ 0.0f, 0.0f }
+};
+
+static std::vector<glm::vec3> CubeBottomFace = {
+	{ -0.5f, -0.5f, -0.5f },
+	{  0.5f, -0.5f, -0.5f },
+	{  0.5f, -0.5f,  0.5f },
+	{  0.5f, -0.5f,  0.5f },
+	{ -0.5f, -0.5f,  0.5f },
+	{ -0.5f, -0.5f, -0.5f }
+};
+
+static std::vector<glm::vec2> CubeBottomFaceUVs = {
+	{ 0.0f, 0.0f },
+	{ 1.0f, 0.0f },
+	{ 1.0f, 1.0f },
+	{ 1.0f, 1.0f },
+	{ 0.0f, 1.0f },
+	{ 0.0f, 0.0f }
+};
+
+static std::vector<glm::vec3> CubeLeftFace = {
+	{ -0.5f,  0.5f,  0.5f },
+	{ -0.5f,  0.5f, -0.5f },
+	{ -0.5f, -0.5f, -0.5f },
+	{ -0.5f, -0.5f, -0.5f },
+	{ -0.5f, -0.5f,  0.5f },
+	{ -0.5f,  0.5f,  0.5f }
+};
+
+static std::vector<glm::vec2> CubeLeftFaceUVs = {
+	{ 0.0f, 0.0f },
+	{ 1.0f, 0.0f },
+	{ 1.0f, 1.0f },
+	{ 1.0f, 1.0f },
+	{ 0.0f, 1.0f },
+	{ 0.0f, 0.0f }
+};
+
+static std::vector<glm::vec3> CubeRightFace = {
+	{  0.5f,  0.5f,  0.5f },
+	{  0.5f,  0.5f, -0.5f },
+	{  0.5f, -0.5f, -0.5f },
+	{  0.5f, -0.5f, -0.5f },
+	{  0.5f, -0.5f,  0.5f },
+	{  0.5f,  0.5f,  0.5f },
+};
+
+static std::vector<glm::vec2> CubeRightFaceUVs = {
+	{ 0.0f, 0.0f },
+	{ 1.0f, 0.0f },
+	{ 1.0f, 1.0f },
+	{ 1.0f, 1.0f },
+	{ 0.0f, 1.0f },
+	{ 0.0f, 0.0f }
+};
+
+static std::vector<glm::vec3> CubeFrontFace = {
+	{ -0.5f, -0.5f,  0.5f },
+	{  0.5f, -0.5f,  0.5f },
+	{  0.5f,  0.5f,  0.5f },
+	{  0.5f,  0.5f,  0.5f },
+	{ -0.5f,  0.5f,  0.5f },
+	{ -0.5f, -0.5f,  0.5f }
+};
+
+static std::vector<glm::vec2> CubeFrontFaceUVs = {
+	{ 1.0f, 1.0f },
+	{ 0.0f, 1.0f },
+	{ 0.0f, 0.0f },
+	{ 0.0f, 0.0f },
+	{ 1.0f, 0.0f },
+	{ 1.0f, 1.0f }
+};
+
+static std::vector<glm::vec3> CubeBackFace = {
+	{ -0.5f, -0.5f, -0.5f },
+	{  0.5f, -0.5f, -0.5f },
+	{  0.5f,  0.5f, -0.5f },
+	{  0.5f,  0.5f, -0.5f },
+	{ -0.5f,  0.5f, -0.5f },
+	{ -0.5f, -0.5f, -0.5f }
+};
+
+static std::vector<glm::vec2> CubeBackFaceUVs = {
+	{ 1.0f, 1.0f },
+	{ 0.0f, 1.0f },
+	{ 0.0f, 0.0f },
+	{ 0.0f, 0.0f },
+	{ 1.0f, 0.0f },
+	{ 1.0f, 1.0f }
+};
+
+#endif
diff --git a/legacy/src/world/chunk/voxel.cpp b/legacy/src/world/chunk/voxel.cpp
new file mode 100644
index 0000000..b0ff73f
--- /dev/null
+++ b/legacy/src/world/chunk/voxel.cpp
@@ -0,0 +1,132 @@
+#include "voxel.hpp"
+
+#include <iostream>
+#include <memory>
+
+#include "../../renderer/shader.hpp"
+#include "../../renderer/camera.hpp"
+
+#include "face.hpp"
+
+#include "../block.hpp"
+
+Voxel::Voxel(glm::vec3 coordsInChunk, uint8_t block) {
+
+	// Texture winding order - top, bottom, left, right, front, back
+
+	Block = block;
+	m_coordsInChunk = coordsInChunk;
+
+}
+
+void Voxel::AddFace(EFaceType::Face face) {
+
+	std::vector<glm::vec3> verts;
+	std::vector<glm::vec2> uvs;
+
+	switch (face) {
+
+		case EFaceType::Top:
+		{
+
+			verts = CubeTopFace;
+			uvs = CubeTopFaceUVs;
+
+			break;
+		}
+
+		case EFaceType::Bottom:
+		{
+
+			verts = CubeBottomFace;
+			uvs = CubeBottomFaceUVs;
+
+			break;
+		}
+
+		case EFaceType::Left:
+		{
+
+			verts = CubeLeftFace;
+			uvs = CubeLeftFaceUVs;
+
+			break;
+		}
+
+		case EFaceType::Right:
+		{
+
+			verts = CubeRightFace;
+			uvs = CubeRightFaceUVs;
+
+			break;
+		}
+
+		case EFaceType::Front:
+		{
+
+			verts = CubeFrontFace;
+			uvs = CubeFrontFaceUVs;
+
+			break;
+		}
+
+		case EFaceType::Back:
+		{
+
+			verts = CubeBackFace;
+			uvs = CubeBackFaceUVs;
+
+			break;
+		}
+
+	}
+
+	
+	verts = m_translateIntoChunk(verts, m_coordsInChunk);
+	m_vertices.insert(m_vertices.end(), verts.begin(), verts.end());
+
+	std::shared_ptr<CBlockEntry> block = CBlockDictionary::GetInstance()->BlockEntries[Block];
+
+	uint16_t tex = block->FaceTextures[(uint16_t)face];
+
+	std::vector<glm::vec3> uvws = {
+		{ uvs[0].x, uvs[0].y, (float)tex },
+		{ uvs[1].x, uvs[1].y, (float)tex },
+		{ uvs[2].x, uvs[2].y, (float)tex },
+		{ uvs[3].x, uvs[3].y, (float)tex },
+		{ uvs[4].x, uvs[4].y, (float)tex },
+		{ uvs[5].x, uvs[5].y, (float)tex },
+	};
+
+	m_uvs.insert(m_uvs.end(), uvws.begin(), uvws.end());
+
+}
+
+void Voxel::GetMesh(std::vector<glm::vec3>& verts, std::vector<glm::vec3>& uvs) {
+
+	verts = m_vertices;
+	uvs = m_uvs;
+
+}
+
+void Voxel::Clear() {
+
+	m_vertices.clear();
+	m_uvs.clear();
+
+}
+
+std::vector<glm::vec3> Voxel::m_translateIntoChunk(std::vector<glm::vec3> verts, glm::vec3 trans) {
+
+	for (int i = 0; i < verts.size(); i++) {
+
+		verts[i].x += trans.x;
+		verts[i].y += trans.y;
+		verts[i].z += trans.z;
+
+	}
+
+	return verts;
+
+}
diff --git a/legacy/src/world/chunk/voxel.hpp b/legacy/src/world/chunk/voxel.hpp
new file mode 100644
index 0000000..1e1b7b9
--- /dev/null
+++ b/legacy/src/world/chunk/voxel.hpp
@@ -0,0 +1,33 @@
+#ifndef MINECRAFT_RENDERER_VOXEL_H_
+#define MINECRAFT_RENDERER_VOXEL_H_
+
+#include "../../common.hpp"
+
+#include "face.hpp"
+
+class Camera;
+class Shader;
+
+class Voxel {
+public:
+	Voxel(glm::vec3 coordsInChunk, uint8_t block);
+
+	void AddFace(EFaceType::Face face);
+	void GetMesh(std::vector<glm::vec3>& verts, std::vector<glm::vec3>& uvs);
+
+	void Clear();
+
+	uint8_t Block;
+
+private:
+
+	glm::vec3 m_coordsInChunk;
+
+	std::vector<glm::vec3> m_translateIntoChunk(std::vector<glm::vec3> verts, glm::vec3 trans);
+
+	std::vector<glm::vec3> m_vertices;
+	std::vector<glm::vec3> m_uvs;
+
+};
+
+#endif
diff --git a/legacy/src/world/entity.cpp b/legacy/src/world/entity.cpp
new file mode 100644
index 0000000..9e14d4f
--- /dev/null
+++ b/legacy/src/world/entity.cpp
@@ -0,0 +1,59 @@
+#include "entity.hpp"
+
+#include "../renderer/camera.hpp"
+
+Entity::Entity(glm::vec3 postion, glm::vec3 direction, std::shared_ptr<Camera> camera)
+    : Position(Position)
+    , Direction(direction) 
+    , EntityCamera(camera)
+    {
+
+    if (EntityCamera) {
+        EntityCamera->UpdateView();
+    }
+}
+
+Player::Player(glm::vec3 position, glm::vec3 direction, std::shared_ptr<Camera> camera)
+    : Entity(position, direction, camera) {
+    
+    Position = { 0, 64, 0 };
+    EntityCamera->Position = { Position.x, Position.y + EyePosition, Position.z };
+    EntityCamera->UpdateView();
+
+}
+
+void Player::MoveSDL(Uint8* state) {
+    
+    EntityCamera->MoveCamera(state);
+    Position = EntityCamera->Position;
+    Position.y -= EyePosition;
+
+}
+
+void Player::HandleMouseSDL(SDL_Event e) {
+
+    EntityCamera->HandleMouse(e);
+    Direction = EntityCamera->LookDirection;
+
+}
+
+void Player::UpdatePosition(glm::vec3 position) {
+
+    Position = position;
+    EntityCamera->UpdatePosition({ Position.x, Position.y + EyePosition, Position.z });
+
+}
+
+
+void Player::UpdateDirection(glm::vec3 direction) {
+
+    Direction = direction;
+    EntityCamera->UpdateLookDirection(direction);
+
+}
+
+void Player::CameraUpdateProjection(int xres, int yres) {
+
+    EntityCamera->UpdateProjection(xres, yres);
+
+}
diff --git a/legacy/src/world/entity.hpp b/legacy/src/world/entity.hpp
new file mode 100644
index 0000000..ade36b9
--- /dev/null
+++ b/legacy/src/world/entity.hpp
@@ -0,0 +1,51 @@
+#ifndef MINECRAFT_WORLD_ENTITY_H_
+#define MINECRAFT_WORLD_ENTITY_H_
+
+#include "../common.hpp"
+
+class Camera;
+
+class Collider;
+
+class Entity {
+public:
+
+    Entity(glm::vec3 position, glm::vec3 direction = { 0.0f, 0.0f, 0.0f }, std::shared_ptr<Camera> camera = std::make_shared<Camera>());
+
+    // World position, 1.7 units below the 
+    // camera position.
+    glm::vec3 Position;
+    // Look direction of the camera
+    glm::vec3 Direction;
+    // Velocity in direction
+    // of movement
+    glm::vec3 Velocity;
+
+    // Can be null
+    std::shared_ptr<Camera> EntityCamera;
+
+    // Collider
+    // std::unique_ptr<Collider> EntityCollider;
+
+    // Mesh (or reference to)
+
+};
+
+class Player : public Entity {
+public: 
+
+    Player(glm::vec3 position, glm::vec3 direction,  std::shared_ptr<Camera> camera);
+
+    float EyePosition = 1.7f;
+
+    void MoveSDL(Uint8* state);
+	void HandleMouseSDL(SDL_Event e);
+
+    void UpdatePosition(glm::vec3 position);
+    void UpdateDirection(glm::vec3 direction);
+
+    void CameraUpdateProjection(int xres, int yres);	
+
+};
+
+#endif
diff --git a/legacy/src/world/generator/chunkgenerator.cpp b/legacy/src/world/generator/chunkgenerator.cpp
new file mode 100644
index 0000000..435dc81
--- /dev/null
+++ b/legacy/src/world/generator/chunkgenerator.cpp
@@ -0,0 +1,14 @@
+#include "../../util/fastnoise.hpp"
+
+
+void dp() {
+
+    FastNoise noise;
+    noise.SetSeed(121212);
+
+    noise.SetNoiseType(FastNoise::SimplexFractal);
+
+    noise.SetFractalOctaves(3);
+
+}
+
diff --git a/legacy/src/world/generator/chunkgenerator.hpp b/legacy/src/world/generator/chunkgenerator.hpp
new file mode 100644
index 0000000..1425a26
--- /dev/null
+++ b/legacy/src/world/generator/chunkgenerator.hpp
@@ -0,0 +1,6 @@
+#ifndef MINECRAFT_WORLD_GENERATOR_CHUNKGENERATOR_H_
+#define MINECRAFT_WORLD_GENERATOR_CHUNKGENERATOR_H_
+
+
+
+#endif
diff --git a/legacy/src/world/generator/chunkmanager.hpp b/legacy/src/world/generator/chunkmanager.hpp
new file mode 100644
index 0000000..8420177
--- /dev/null
+++ b/legacy/src/world/generator/chunkmanager.hpp
@@ -0,0 +1,28 @@
+#ifndef MINECRAFT_WORLD_GENERATOR_CUNKMANAGER_H_
+#define MINECRAFT_WORLD_GENERATOR_CUNKMANAGER_H_
+
+#include "../../common.hpp"
+
+
+class Frustrum;
+
+class ChunkManager {
+public:
+
+    // Instatntiated 
+    ChunkManager();
+
+    void Update();
+
+    void Play();
+    void Pause();
+
+    void LoadChunksAroundWorldPoint(glm::vec3 worldPoint);
+
+
+
+    void CullFrustrumFromRenderQueue();
+
+};
+
+#endif
diff --git a/legacy/src/world/world.cpp b/legacy/src/world/world.cpp
new file mode 100644
index 0000000..f7e3fbb
--- /dev/null
+++ b/legacy/src/world/world.cpp
@@ -0,0 +1,191 @@
+#include "world.hpp"
+
+#include <algorithm>
+#include <iterator>
+
+#include "chunk/chunk.hpp"
+
+#include "../renderer/shader.hpp"
+
+#include "../util/fastnoise.hpp"
+
+#include "../config.hpp"
+#include "entity.hpp"
+
+World::World() {
+
+}
+
+void World::LoadWorld() {
+	
+	m_shaders["Basic"] = std::make_shared<Shader>();
+	m_shaders["Basic"]->Load(GameConfig.ResourceBase + "shaders/simple");
+	m_shaders["Basic"]->Link();
+
+	srand(time(NULL));
+
+    m_noiseGenerator = std::make_shared<FastNoise>();
+    m_noiseGenerator->SetSeed(rand());
+
+    m_noiseGenerator->SetNoiseType(FastNoise::ValueFractal);
+
+    m_noiseGenerator->SetFractalOctaves(5);
+
+	// Generate a 54x54 chunk world
+	for (int x = -4; x < 50; x++)
+	for (int y = -50; y < 4; y++) {
+
+		m_chunkLoaderQueue.push({ x, y });
+
+	}
+
+	// Spawn generator threads
+	for (int i = 0; i < 6; i++) {
+
+		m_generatorThreads.push_back(std::thread([&]() {
+
+			m_loadChunks();
+
+		}));
+
+	}
+
+	m_generatorRunning = true;
+
+}
+
+void World::SetTextureMap(GLuint map) {
+
+	m_textureMapID = map;
+
+}
+
+glm::vec3 World::GetChunkCoords(glm::vec3 worldCoords) {
+
+	return { worldCoords.x / static_cast<float>(CHUNK_WIDTH), 
+			 worldCoords.y / static_cast<float>(CHUNK_HEIGHT),
+			 worldCoords.z / static_cast<float>(CHUNK_DEPTH) };
+
+}
+
+glm::vec2 World::GetChunk(glm::vec3 worldCoords) {
+
+	return { static_cast<int>(worldCoords.x / CHUNK_WIDTH), static_cast<int>(worldCoords.z / CHUNK_DEPTH) };	
+
+}
+
+std::vector<std::shared_ptr<Chunk>> World::GetRenderableChunks() {
+
+	std::vector<std::shared_ptr<Chunk>> chunks;
+
+	for (auto& chunk : m_chunks) {
+
+		 // Should the chunk be rendererd ?
+		if (chunk.second->ShouldRender) {
+
+			m_chunkLoderMutex.lock();
+
+			if (chunk.second->MeshReady)
+				chunk.second->UploadMesh();
+
+			m_chunkLoderMutex.unlock();
+
+			// If not, add it
+			chunks.push_back(chunk.second);
+
+		}
+
+	}
+
+	return chunks;
+
+}
+
+void World::Update(std::shared_ptr<Entity> player) {
+
+	// glm::vec2 inChunk = GetChunk(player->Position);
+
+	// if (m_chunks.find(inChunk) == m_chunks.end()) {
+
+	// 	m_chunkLoderMutex.lock();
+
+	// 	m_chunkLoaderQueue.push(inChunk);
+
+	// 	m_chunkLoderMutex.unlock();
+
+	// }
+
+	// std::cout << "Position: " << player->Position.x << ":" << player->Position.y << ":" << player->Position.z << std::endl;
+	// std::cout << "Chunk: " << inChunk.x << ":" << inChunk.y << std::endl << std::endl;
+
+}
+
+void World::Render(std::shared_ptr<Entity> player) {
+
+	glBindTexture(GL_TEXTURE_2D_ARRAY, m_textureMapID);
+
+	std::vector<std::shared_ptr<Chunk>> chunks = GetRenderableChunks();
+
+	
+	for (int i = 0; i < chunks.size(); i++) {
+
+		chunks[i]->Render(player->EntityCamera, m_shaders["Basic"]);
+
+	}
+
+}
+
+World::~World() {
+
+	m_generatorRunning = false;
+
+	for (int i = 0; i < m_generatorThreads.size(); i++) {
+
+		m_generatorThreads[i].join();
+
+	}
+
+	for (auto& chunk : m_chunks) {
+
+		chunk.second->Unload();
+
+	}
+
+}
+
+void World::m_loadChunks() {
+
+	while (m_generatorRunning) {
+
+		m_chunkLoderMutex.lock();
+
+		glm::vec2 coords = m_chunkLoaderQueue.front();
+		m_chunkLoaderQueue.pop();
+
+		m_chunkLoderMutex.unlock();
+
+
+		std::shared_ptr<Chunk> loadingChunk = std::make_shared<Chunk>(coords.x, coords.y, m_noiseGenerator);
+		loadingChunk->ShouldRender = true;
+		std::cout << "Loaded chunk " << coords.x << ":" << coords.y << std::endl;
+
+
+		m_chunkLoderMutex.lock();
+
+		m_chunks[coords] = loadingChunk;
+
+		m_chunkLoderMutex.unlock();
+
+
+		while (m_chunkLoaderQueue.empty()) {
+
+			if (!m_generatorRunning) break;
+
+			static std::chrono::milliseconds dura(1);
+			std::this_thread::sleep_for(dura);
+		
+		}
+
+	}
+
+}
diff --git a/legacy/src/world/world.hpp b/legacy/src/world/world.hpp
new file mode 100644
index 0000000..771dedc
--- /dev/null
+++ b/legacy/src/world/world.hpp
@@ -0,0 +1,80 @@
+#ifndef MINECRAFT_WORLD_WORLD_H_
+#define MINECRAFT_WORLD_WORLD_H_
+
+#include "../common.hpp"
+
+#include "../renderer/camera.hpp"
+
+#include "generator/chunkmanager.hpp"
+#include "chunk/chunk.hpp"
+
+#include <unordered_map>
+#include <thread>
+#include <mutex>
+#include <queue>
+
+class FastNoise;
+
+class Shader;
+class Entity;
+
+class World {
+public:
+
+	// Default constructor
+	World();
+
+	// Preps the render threads and loads all of the shaders
+	void LoadWorld();
+
+	void SetTextureMap(GLuint map);
+
+	// Takes world coordinates and gets a chunks coordinates
+	glm::vec3 GetChunkCoords(glm::vec3 wordCoords);
+
+	// Takes world coordinates and gets the chunk those coorinates
+	// fall in
+	glm::vec2 GetChunk(glm::vec3 worldCoords);
+
+	std::vector<std::shared_ptr<Chunk>> GetRenderableChunks();
+
+	void Update(std::shared_ptr<Entity> player);
+	void Render(std::shared_ptr<Entity> player);
+
+	~World();
+
+private:
+
+	// GL stuff
+
+	// Main texture map id
+	GLuint m_textureMapID;
+	
+	// Shaders indexed by name
+	std::map<std::string, std::shared_ptr<Shader>> m_shaders;
+	
+	
+	// Threads used for chunk generation
+	std::vector<std::thread> m_generatorThreads;
+	bool m_generatorRunning = false;
+	
+
+	// Chuks
+	// Indexed by chunk coorinates
+	std::unordered_map<glm::vec2, std::shared_ptr<Chunk>> m_chunks;
+
+	std::mutex m_chunkUpdaterMutex;
+	std::queue<glm::vec2> m_chunkUpdatesQueue;
+
+	std::mutex m_chunkLoderMutex;
+	std::queue<glm::vec2> m_chunkLoaderQueue;
+
+	// Generator
+	std::shared_ptr<FastNoise> m_noiseGenerator;
+
+
+	void m_loadChunks();
+
+};
+
+#endif
diff --git a/src/Rendering/camera.cpp b/src/Rendering/camera.cpp
new file mode 100644
index 0000000..ed0f372
--- /dev/null
+++ b/src/Rendering/camera.cpp
@@ -0,0 +1,209 @@
+#include "camera.hpp"
+
+Camera::Camera()
+{
+
+	projMatrix = glm::perspective( glm::radians( 45.0f ), 1.0f, 0.1f, 1000.0f );
+
+	Roll = 0.0f;
+	Pitch = 0.0f;
+	Yaw = 0.0f;
+
+	Position = {};
+	LookDirection = {};
+
+	viewMatrix = {};
+
+	UpdateView();
+
+}
+
+Camera::Camera( int w, int h )
+{
+
+	projMatrix = glm::perspective( glm::radians( 45.0f ), (float) w / float( h ), 0.1f, 1000.0f );
+
+	Roll = 0.0f;
+	Pitch = 0.0f;
+	Yaw = 0.0f;
+
+	Position = {};
+	LookDirection = {};
+
+	viewMatrix = {};
+
+	UpdateView();
+
+}
+
+void Camera::UpdateView()
+{
+
+	// roll can be removed
+	glm::mat4 matRoll = glm::mat4( 1.0f ); //identity matrix; 
+	glm::mat4 matPitch = glm::mat4( 1.0f );//identity matrix
+	glm::mat4 matYaw = glm::mat4( 1.0f );  //identity matrix
+
+	// roll, pitch and yaw
+	matRoll = glm::rotate( matRoll, Roll, glm::vec3( 0.0f, 0.0f, 1.0f ) );
+	matPitch = glm::rotate( matPitch, Pitch, glm::vec3( 1.0f, 0.0f, 0.0f ) );
+	matYaw = glm::rotate( matYaw, Yaw, glm::vec3( 0.0f, 1.0f, 0.0f ) );
+
+	glm::mat4 rotate = matRoll * matPitch * matYaw;
+
+	glm::mat4 translate = glm::mat4( 1.0f );
+	translate = glm::translate( translate, -Position );
+
+	viewMatrix = rotate * translate;
+
+	// Work out Look Vector
+	glm::mat4 inverseView = glm::inverse( viewMatrix );
+
+	LookDirection.x = inverseView[2][0];
+	LookDirection.y = inverseView[2][1];
+	LookDirection.z = inverseView[2][2];
+
+}
+
+glm::mat4 Camera::GetViewMatrix()
+{
+
+	return viewMatrix;
+
+}
+
+glm::mat4 Camera::GetProjectionMatrix()
+{
+
+	return projMatrix;
+
+}
+
+void Camera::UpdateProjection( int width, int height )
+{
+
+	projMatrix = glm::perspective( glm::radians( 45.0f ), (float) width / (float) height, 0.1f, 1000.0f );
+
+}
+
+void Camera::HandleMouse( SDL_Event e )
+{
+
+	if ( e.type != SDL_MOUSEMOTION )
+		return;
+
+
+	float mouseDX = e.motion.xrel;
+	float mouseDY = e.motion.yrel;
+
+	glm::vec2 mouseDelta { mouseDX, mouseDY };
+
+	MouseMoved( mouseDelta );
+
+}
+
+void Camera::MoveCamera( Uint8* state )
+{
+
+	float dx = 0;
+	float dz = 0;
+	float dy = 0;
+
+	// Rotate by camera direction
+	glm::mat2 rotate {
+		cos( Yaw ), -sin( Yaw ),
+		sin( Yaw ), cos( Yaw )
+	};
+
+	glm::vec2 f( 0.0, 1.0 );
+	f = f * rotate;
+
+	if ( state[SDL_SCANCODE_W] )
+	{
+		dz -= f.y;
+		dx -= f.x;
+	}
+	if ( state[SDL_SCANCODE_S] )
+	{
+		dz += f.y;
+		dx += f.x;
+	}
+	if ( state[SDL_SCANCODE_A] )
+	{
+		dz += f.x;
+		dx += -f.y;
+	}
+	if ( state[SDL_SCANCODE_D] )
+	{
+		dz -= f.x;
+		dx -= -f.y;
+	}
+	if ( state[SDL_SCANCODE_SPACE] )
+	{
+		dy += 1;
+	}
+	if ( state[SDL_SCANCODE_LSHIFT] )
+	{
+		dy -= 1;
+	}
+
+	// get current view matrix
+	glm::mat4 mat = GetViewMatrix();
+	glm::vec3 forward( mat[0][2], mat[1][2], mat[2][2] );
+	glm::vec3 strafe( mat[0][0], mat[1][0], mat[2][0] );
+
+	// forward vector must be negative to look forward. 
+	// read :http://in2gpu.com/2015/05/17/view-matrix/
+	Position.x += dx * CameraSpeed;
+	Position.z += dz * CameraSpeed;
+	Position.y += dy * CameraSpeed;
+
+	// update the view matrix
+	UpdateView();
+
+}
+
+void Camera::MouseMoved( glm::vec2 mouseDelta )
+{
+
+	// note that yaw and pitch must be converted to radians.
+	// this is done in UpdateView() by glm::rotate
+	Yaw += MouseSensitivity * (mouseDelta.x / 100);
+	Pitch += MouseSensitivity * (mouseDelta.y / 100);
+	Pitch = glm::clamp<float>( Pitch, -M_PI / 2, M_PI / 2 );
+
+	UpdateView();
+
+}
+
+void Camera::UpdatePosition( glm::vec3 position )
+{
+
+	Position = position;
+
+	UpdateView();
+
+}
+
+void Camera::UpdateEulerLookDirection( float roll, float pitch, float yaw )
+{
+
+	Roll = roll; Pitch = pitch; Yaw = yaw;
+	LookDirection.x = cos( Yaw ) * cos( Pitch );
+	LookDirection.y = sin( Yaw ) * cos( Pitch );
+	LookDirection.z = sin( Pitch );
+
+	UpdateView();
+
+}
+
+void Camera::UpdateLookDirection( glm::vec3 lookDirection )
+{
+
+	LookDirection = lookDirection;
+	Pitch = asin( -lookDirection.y );
+	Yaw = atan2( lookDirection.x, lookDirection.z );
+
+	UpdateView();
+
+}
diff --git a/src/Rendering/camera.hpp b/src/Rendering/camera.hpp
new file mode 100644
index 0000000..f14d0eb
--- /dev/null
+++ b/src/Rendering/camera.hpp
@@ -0,0 +1,55 @@
+#ifndef MINECRAFT_RENDERING_CAMERA_H_
+#define MINECRAFT_RENDERING_CAMERA_H_
+
+#include <glm/gtc/matrix_transform.hpp>
+#include <glm/gtc/type_ptr.hpp>
+#define GLM_ENABLE_EXPERIMENTAL
+#include <glm/gtx/hash.hpp>
+#include <glm/glm.hpp>
+
+#if _WIN32
+#include <SDL.h>
+#else
+#include <SDL2/SDL.h>
+#endif
+
+class Camera {
+public:
+	Camera();
+	Camera(int w, int h);
+
+	void UpdateView();
+
+	glm::mat4 GetViewMatrix();
+	glm::mat4 GetProjectionMatrix();
+	glm::mat4 GetFrustrumMatrix();
+
+	void UpdateProjection(int width, int height);
+
+	// Keyboard
+	void MoveCamera(Uint8* state);
+	// Mouse
+	void HandleMouse(SDL_Event e);
+	// Mouse Delta
+	void MouseMoved(glm::vec2 mouseDelta);
+
+	// Updatable by 
+	float MouseSensitivity = 0.1f;
+	float CameraSpeed = 2.0f;
+
+	void UpdatePosition(glm::vec3 position);
+	void UpdateEulerLookDirection(float roll, float pitch, float yaw);
+	void UpdateLookDirection(glm::vec3 lookDirection);
+
+	glm::vec3 Position = {};
+	float Roll, Pitch, Yaw;
+	glm::vec3 LookDirection = {};
+
+private:
+
+	glm::mat4 viewMatrix = {};
+	glm::mat4 projMatrix = {};
+	
+};
+
+#endif
diff --git a/src/Rendering/face.hpp b/src/Rendering/face.hpp
new file mode 100644
index 0000000..885ca4e
--- /dev/null
+++ b/src/Rendering/face.hpp
@@ -0,0 +1,128 @@
+#ifndef MINECRAFT_RENDERING_FACE_H_
+#define MINECRAFT_RENDERING_FACE_H_
+
+#include <vector>
+#include <glm/glm.hpp>
+
+namespace EFaceType {
+
+	enum Face : uint8_t {
+		Top,
+		Bottom,
+		Left,
+		Right,
+		Front,
+		Back,
+	};
+
+}
+
+static std::vector<glm::vec3> CubeTopFace = {
+	{ -0.5f,  0.5f, -0.5f },
+	{  0.5f,  0.5f, -0.5f },
+	{  0.5f,  0.5f,  0.5f },
+	{  0.5f,  0.5f,  0.5f },
+	{ -0.5f,  0.5f,  0.5f },
+	{ -0.5f,  0.5f, -0.5f }
+};
+
+static std::vector<glm::vec2> CubeTopFaceUVs = {
+	{ 0.0f, 0.0f },
+	{ 1.0f, 0.0f },
+	{ 1.0f, 1.0f },
+	{ 1.0f, 1.0f },
+	{ 0.0f, 1.0f },
+	{ 0.0f, 0.0f }
+};
+
+static std::vector<glm::vec3> CubeBottomFace = {
+	{ -0.5f, -0.5f, -0.5f },
+	{  0.5f, -0.5f, -0.5f },
+	{  0.5f, -0.5f,  0.5f },
+	{  0.5f, -0.5f,  0.5f },
+	{ -0.5f, -0.5f,  0.5f },
+	{ -0.5f, -0.5f, -0.5f }
+};
+
+static std::vector<glm::vec2> CubeBottomFaceUVs = {
+	{ 0.0f, 0.0f },
+	{ 1.0f, 0.0f },
+	{ 1.0f, 1.0f },
+	{ 1.0f, 1.0f },
+	{ 0.0f, 1.0f },
+	{ 0.0f, 0.0f }
+};
+
+static std::vector<glm::vec3> CubeLeftFace = {
+	{ -0.5f,  0.5f,  0.5f },
+	{ -0.5f,  0.5f, -0.5f },
+	{ -0.5f, -0.5f, -0.5f },
+	{ -0.5f, -0.5f, -0.5f },
+	{ -0.5f, -0.5f,  0.5f },
+	{ -0.5f,  0.5f,  0.5f }
+};
+
+static std::vector<glm::vec2> CubeLeftFaceUVs = {
+	{ 0.0f, 0.0f },
+	{ 1.0f, 0.0f },
+	{ 1.0f, 1.0f },
+	{ 1.0f, 1.0f },
+	{ 0.0f, 1.0f },
+	{ 0.0f, 0.0f }
+};
+
+static std::vector<glm::vec3> CubeRightFace = {
+	{  0.5f,  0.5f,  0.5f },
+	{  0.5f,  0.5f, -0.5f },
+	{  0.5f, -0.5f, -0.5f },
+	{  0.5f, -0.5f, -0.5f },
+	{  0.5f, -0.5f,  0.5f },
+	{  0.5f,  0.5f,  0.5f },
+};
+
+static std::vector<glm::vec2> CubeRightFaceUVs = {
+	{ 0.0f, 0.0f },
+	{ 1.0f, 0.0f },
+	{ 1.0f, 1.0f },
+	{ 1.0f, 1.0f },
+	{ 0.0f, 1.0f },
+	{ 0.0f, 0.0f }
+};
+
+static std::vector<glm::vec3> CubeFrontFace = {
+	{ -0.5f, -0.5f,  0.5f },
+	{  0.5f, -0.5f,  0.5f },
+	{  0.5f,  0.5f,  0.5f },
+	{  0.5f,  0.5f,  0.5f },
+	{ -0.5f,  0.5f,  0.5f },
+	{ -0.5f, -0.5f,  0.5f }
+};
+
+static std::vector<glm::vec2> CubeFrontFaceUVs = {
+	{ 1.0f, 1.0f },
+	{ 0.0f, 1.0f },
+	{ 0.0f, 0.0f },
+	{ 0.0f, 0.0f },
+	{ 1.0f, 0.0f },
+	{ 1.0f, 1.0f }
+};
+
+static std::vector<glm::vec3> CubeBackFace = {
+	{ -0.5f, -0.5f, -0.5f },
+	{  0.5f, -0.5f, -0.5f },
+	{  0.5f,  0.5f, -0.5f },
+	{  0.5f,  0.5f, -0.5f },
+	{ -0.5f,  0.5f, -0.5f },
+	{ -0.5f, -0.5f, -0.5f }
+};
+
+static std::vector<glm::vec2> CubeBackFaceUVs = {
+	{ 1.0f, 1.0f },
+	{ 0.0f, 1.0f },
+	{ 0.0f, 0.0f },
+	{ 0.0f, 0.0f },
+	{ 1.0f, 0.0f },
+	{ 1.0f, 1.0f }
+};
+
+#endif
diff --git a/src/Rendering/frustrum.cpp b/src/Rendering/frustrum.cpp
new file mode 100644
index 0000000..43a2d1a
--- /dev/null
+++ b/src/Rendering/frustrum.cpp
@@ -0,0 +1,3 @@
+#include "frustrum.hpp"
+
+
diff --git a/src/Rendering/frustrum.hpp b/src/Rendering/frustrum.hpp
new file mode 100644
index 0000000..a29f37b
--- /dev/null
+++ b/src/Rendering/frustrum.hpp
@@ -0,0 +1,29 @@
+#ifndef MINECRAFT_RENDERING_FRUSTRUM_H_
+#define MINECRAFT_RENDERING_FRUSTRUM_H_
+
+namespace EFrustrumPlanes 
+{
+    enum Planes 
+    {
+        Right,
+        Left,
+        Top,
+        Bottom,
+        Far,
+        Near
+    };
+};
+
+class FrustrumPlane 
+{
+public:
+
+};
+
+class Frustrum 
+{
+public:
+
+};
+
+#endif
diff --git a/src/Rendering/mesh.cpp b/src/Rendering/mesh.cpp
new file mode 100644
index 0000000..6056583
--- /dev/null
+++ b/src/Rendering/mesh.cpp
@@ -0,0 +1,6 @@
+#include "mesh.hpp"
+
+Mesh::Mesh()
+{
+
+}
diff --git a/src/Rendering/mesh.hpp b/src/Rendering/mesh.hpp
new file mode 100644
index 0000000..433cdf2
--- /dev/null
+++ b/src/Rendering/mesh.hpp
@@ -0,0 +1,27 @@
+#ifndef MINECRAFT_RENDERING_MESH_H_
+#define MINECRAFT_RENDERING_MESH_H_
+
+#include "../common.hpp"
+
+class Vertex
+{
+public:
+	glm::vec3 Position;
+	glm::vec3 SurfaceNormal;
+
+};
+
+class Mesh
+{
+public:
+
+	Mesh();
+	
+private:
+
+	GLuint mVAO;
+	GLuint mVBO;
+
+};
+
+#endif
diff --git a/src/Rendering/renderable.cpp b/src/Rendering/renderable.cpp
new file mode 100644
index 0000000..68a7d31
--- /dev/null
+++ b/src/Rendering/renderable.cpp
@@ -0,0 +1,66 @@
+#include "renderable.hpp"
+
+#include <algorithm>
+
+Renderable::Renderable()
+{
+
+}
+
+void Renderable::Init()
+{
+	
+}
+
+void Renderable::AddMesh( Mesh* mesh )
+{
+	if (mesh == nullptr) return;
+	mMeshs.push_back( mesh );
+}
+
+void Renderable::RemoveMesh( Mesh* mesh )
+{
+	// Renderable does not include mesh
+	if (std::find( mMeshs.begin(), mMeshs.end(), mesh ) == mMeshs.end())
+		return;
+
+	std::remove( mMeshs.begin(), mMeshs.end(), mesh );
+
+}
+
+void Renderable::SetActiveMesh( Mesh* mesh )
+{
+	// Renderable does not include mesh
+	if (std::find( mMeshs.begin(), mMeshs.end(), mesh ) == mMeshs.end())
+		return;
+
+	mActiveMesh = mesh;
+}
+
+Mesh* Renderable::GetActiveMesh()
+{
+	return mActiveMesh;
+}
+
+void Renderable::UpdateBuffer()
+{
+
+}
+
+void Renderable::Load()
+{
+
+}
+
+void Renderable::Unload()
+{
+
+}
+
+Renderable::~Renderable()
+{
+	Unload();
+}
+
+
+
diff --git a/src/Rendering/renderable.hpp b/src/Rendering/renderable.hpp
new file mode 100644
index 0000000..7441d8c
--- /dev/null
+++ b/src/Rendering/renderable.hpp
@@ -0,0 +1,45 @@
+#ifndef MINECRAFT_RENDERING_RENDERABLE_H_
+#define MINECRAFT_RENDERING_RENDERABLE_H_
+
+#include "../common.hpp"
+
+class Mesh;
+
+// Basically a model but thats effort
+// perhaps sub-class?
+class Renderable
+{
+public:
+	Renderable();
+
+	void Init();
+
+	// DOES NOT OWN MESH
+	void AddMesh( Mesh* );
+	void RemoveMesh( Mesh* );
+	void SetActiveMesh( Mesh* );
+	Mesh* GetActiveMesh();
+
+	void UpdateBuffer();
+
+	// GPU Load methods
+	void Load();
+	void Unload();
+
+	~Renderable();
+
+private:
+
+	std::vector<glm::vec3> mBuff;
+
+	std::vector<Mesh*> mMeshs;
+	Mesh* mActiveMesh;
+
+	// Meshes have uniform uniforms
+	GLuint mUBO;
+
+	glm::mat4 mModelMatrix;
+
+};
+
+#endif
diff --git a/src/Rendering/rendermaster.cpp b/src/Rendering/rendermaster.cpp
new file mode 100644
index 0000000..84719ea
--- /dev/null
+++ b/src/Rendering/rendermaster.cpp
@@ -0,0 +1,11 @@
+#include "rendermaster.hpp"
+
+RenderMaster::RenderMaster()
+	: mWorldRenderer(),
+	  mMeshRenderer()
+{
+
+
+
+}
+
diff --git a/src/Rendering/rendermaster.hpp b/src/Rendering/rendermaster.hpp
new file mode 100644
index 0000000..9965d23
--- /dev/null
+++ b/src/Rendering/rendermaster.hpp
@@ -0,0 +1,41 @@
+#ifndef MINECRAFT_RENDERING_RENDERMASTER_H_
+#define MINECRAFT_RENDERING_RENDERMASTER_H_
+
+/**
+ * Renderer Structure
+ * 
+ * Mesh -> Renderable
+ * Mesh -> VoxelMesh
+ * Renderable -> Model
+ * Renderable -> World (static(?))
+ * Renderable -> Entity (dynamic)
+ * Renderable -> Particle (dynamic)
+ * 
+ * Kinda just winging it ngl
+*/
+
+class WorldRenderer
+{
+
+};
+
+
+class MeshRenderer
+{
+
+};
+
+
+class RenderMaster
+{
+public:
+	RenderMaster();
+
+
+	WorldRenderer mWorldRenderer;
+	MeshRenderer mMeshRenderer;
+
+
+};
+
+#endif
diff --git a/src/Rendering/shader.cpp b/src/Rendering/shader.cpp
new file mode 100644
index 0000000..a851d0e
--- /dev/null
+++ b/src/Rendering/shader.cpp
@@ -0,0 +1,121 @@
+#include "shader.hpp"
+
+#include "../utilities.hpp"
+
+Shader::Shader()
+{
+
+	Program = 0;
+	mFrag = 0;
+	mVert = 0;
+
+	mLogger = std::make_shared<Logger>();
+
+}
+
+
+void Shader::Load( std::string path )
+{
+
+	std::string vertexLocation = path + ".vert";
+	Load( vertexLocation, GL_VERTEX_SHADER );
+	*mLogger << LOGGER_INFO << "Vertex shader at '" << vertexLocation << "' loaded..." << LOGGER_ENDL;
+
+
+	std::string fragmentLocation = path + ".frag";
+	Load( fragmentLocation, GL_FRAGMENT_SHADER );
+	*mLogger << LOGGER_INFO << "Fragment shader at '" << fragmentLocation << "' loaded..." << LOGGER_ENDL;
+
+}
+
+
+void Shader::Load( std::string path, GLenum type )
+{
+
+	GLuint activeShader = 0;
+
+	if ( type == GL_VERTEX_SHADER )
+		mVert = activeShader = glCreateShader( type );
+
+	if ( type == GL_FRAGMENT_SHADER )
+		mFrag = activeShader = glCreateShader( type );
+
+	std::string loadedShaderSource = LoadTextFromFile( path );
+	const char* shaderSource = loadedShaderSource.c_str();
+	int shaderSourceLength = loadedShaderSource.length();
+
+	glShaderSource( activeShader, 1, &shaderSource, &shaderSourceLength );
+
+}
+
+void Shader::Link()
+{
+
+	if ( mVert == 0 || mFrag == 0 )
+	{
+		*mLogger << LOGGER_ERROR << "Failed to link programs: Both programs not present" << LOGGER_ENDL;
+		return;
+	}
+
+	glCompileShader( mVert );
+	if ( mCheckShader( mVert ) )
+	{
+		*mLogger << LOGGER_INFO << "Vertex shader '" << mVert << "' compiled..." << LOGGER_ENDL;
+	}
+
+	glCompileShader( mFrag );
+	if ( mCheckShader( mFrag ) )
+	{
+		*mLogger << LOGGER_INFO << "Fragment shader '" << mFrag << "' compiled..." << LOGGER_ENDL;
+	}
+
+	Program = glCreateProgram();
+
+	glAttachShader( Program, mVert );
+	glAttachShader( Program, mFrag );
+
+	glLinkProgram( Program );
+
+	glDeleteShader( mVert );
+	glDeleteShader( mFrag );
+
+	*mLogger << LOGGER_INFO << "Program '" << Program << "' loaded..." << LOGGER_ENDL;
+
+}
+
+void Shader::Use()
+{
+
+	glUseProgram( Program );
+
+}
+
+
+bool Shader::mCheckShader( GLuint uid )
+{
+
+	GLint status = GL_TRUE;
+
+	glGetShaderiv( uid, GL_COMPILE_STATUS, &status );
+
+	if ( status == GL_FALSE )
+	{
+		char buf[512];
+		glGetShaderInfoLog( uid, 512, NULL, buf );
+		*mLogger << LOGGER_ERROR << buf << LOGGER_ENDL;
+		delete buf;
+		return false;
+	}
+
+	return true;
+}
+
+
+Shader::~Shader()
+{
+
+	glDeleteProgram( Program );
+	glDeleteShader( mVert );
+	glDeleteShader( mFrag );
+
+}
diff --git a/src/Rendering/shader.hpp b/src/Rendering/shader.hpp
new file mode 100644
index 0000000..7f7e709
--- /dev/null
+++ b/src/Rendering/shader.hpp
@@ -0,0 +1,30 @@
+#ifndef MINECRAFT_RENDERING_SHADER_H_
+#define MINECRAFT_RENDERING_SHADER_H_
+
+#include <logger.h>
+
+#include <glad/glad.h>
+
+class Shader {
+public:
+	Shader();
+
+	void Load(std::string path);
+	void Load(std::string path, GLenum type);
+
+	GLuint Program;
+	void Link();
+
+	void Use();
+
+	~Shader();
+private:
+	std::shared_ptr<Logger> mLogger;
+
+	bool mCheckShader(GLuint uid);
+
+	GLuint mVert;
+	GLuint mFrag;
+};
+
+#endif
diff --git a/src/Rendering/texture.cpp b/src/Rendering/texture.cpp
new file mode 100644
index 0000000..7b91f10
--- /dev/null
+++ b/src/Rendering/texture.cpp
@@ -0,0 +1,60 @@
+#include "texture.hpp"
+
+#include <logger.h>
+
+#include "../settings.hpp"
+
+#define STB_IMAGE_IMPLEMENTATION
+#include "../ThirdParty/stb_image.hpp"
+
+GLuint Texture::LoadTextures(std::vector<std::string> textures) {
+
+    Logger logger;
+
+    std::string basePath = ResourceBase + "textures/";
+
+    int x = 16;
+    int y = 16;
+
+	GLsizei layers = textures.size();
+	GLubyte* texels = (GLubyte*)malloc(x * y * 4 * layers * sizeof(GLubyte));
+
+    for (int i = 0; i < layers; i++) {
+
+        std::string path = basePath + textures[i];
+
+        int xR = 0;
+        int yR = 0;
+        int cR = 0;
+
+        unsigned char* texture = stbi_load(path.c_str(), &xR, &yR, &cR, STBI_rgb_alpha);
+
+		memcpy(texels + (i * x * y * 4), texture, x * y * 4);
+
+		stbi_image_free(texture);
+        logger << LOGGER_INFO << "Texture at '" << path << "' Loaded..." << LOGGER_ENDL;
+        
+    }
+
+	GLuint textureArray = 0;
+
+	glGenTextures(1, &textureArray);
+	glBindTexture(GL_TEXTURE_2D_ARRAY, textureArray);
+
+	glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RGBA8, x, y, layers, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
+
+	glTexSubImage3D(GL_TEXTURE_2D_ARRAY, 0, 0, 0, 0, x, y, layers, GL_RGBA, GL_UNSIGNED_BYTE, texels);
+
+	glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+	glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+
+	glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+	glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+
+	glBindTexture(GL_TEXTURE_2D_ARRAY, 0);
+
+	free(texels);
+
+	return textureArray;
+
+}
diff --git a/src/Rendering/texture.hpp b/src/Rendering/texture.hpp
new file mode 100644
index 0000000..cffbcb1
--- /dev/null
+++ b/src/Rendering/texture.hpp
@@ -0,0 +1,14 @@
+#ifndef MINECRAFT_RENDERING_TEXTURE_H_
+#define MINECRAFT_RENDERING_TEXTURE_H_
+
+#include <vector>
+#include <string>
+
+#include <glad/glad.h>
+
+class Texture {
+public:
+    GLuint LoadTextures(std::vector<std::string> textures);
+};
+
+#endif
diff --git a/src/Rendering/voxelmesh.hpp b/src/Rendering/voxelmesh.hpp
new file mode 100644
index 0000000..bb94ecf
--- /dev/null
+++ b/src/Rendering/voxelmesh.hpp
@@ -0,0 +1,23 @@
+#ifndef MINECRAFT_RENDERER_VOXELMESH_H_
+#define MINECRAFT_RENDERER_VOXELMESH_H_
+
+#include "mesh.hpp"
+
+class VoxelMesh : public Mesh
+{
+public:
+
+	VoxelMesh();
+
+
+
+	int Width;
+	int Height;
+	int Depth;
+
+	// Size is w*h*d
+	std::vector<uint8_t> Voxels;
+
+};
+
+#endif
diff --git a/src/ThirdParty/fastnoise.cpp b/src/ThirdParty/fastnoise.cpp
new file mode 100644
index 0000000..4fdba6b
--- /dev/null
+++ b/src/ThirdParty/fastnoise.cpp
@@ -0,0 +1,2250 @@
+// FastNoise.cpp
+//
+// MIT License
+//
+// Copyright(c) 2017 Jordan Peck
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+// The developer's email is jorzixdan.me2@gzixmail.com (for great email, take
+// off every 'zix'.)
+//
+
+#include "fastnoise.hpp"
+
+#include <math.h>
+#include <assert.h>
+
+#include <algorithm>
+#include <random>
+
+const FN_DECIMAL GRAD_X[] =
+{
+	1, -1, 1, -1,
+	1, -1, 1, -1,
+	0, 0, 0, 0
+};
+const FN_DECIMAL GRAD_Y[] =
+{
+	1, 1, -1, -1,
+	0, 0, 0, 0,
+	1, -1, 1, -1
+};
+const FN_DECIMAL GRAD_Z[] =
+{
+	0, 0, 0, 0,
+	1, 1, -1, -1,
+	1, 1, -1, -1
+};
+
+const FN_DECIMAL GRAD_4D[] =
+{
+	0,1,1,1,0,1,1,-1,0,1,-1,1,0,1,-1,-1,
+	0,-1,1,1,0,-1,1,-1,0,-1,-1,1,0,-1,-1,-1,
+	1,0,1,1,1,0,1,-1,1,0,-1,1,1,0,-1,-1,
+	-1,0,1,1,-1,0,1,-1,-1,0,-1,1,-1,0,-1,-1,
+	1,1,0,1,1,1,0,-1,1,-1,0,1,1,-1,0,-1,
+	-1,1,0,1,-1,1,0,-1,-1,-1,0,1,-1,-1,0,-1,
+	1,1,1,0,1,1,-1,0,1,-1,1,0,1,-1,-1,0,
+	-1,1,1,0,-1,1,-1,0,-1,-1,1,0,-1,-1,-1,0
+};
+
+const FN_DECIMAL VAL_LUT[] =
+{
+	FN_DECIMAL(0.3490196078), FN_DECIMAL(0.4352941176), FN_DECIMAL(-0.4509803922), FN_DECIMAL(0.6392156863), FN_DECIMAL(0.5843137255), FN_DECIMAL(-0.1215686275), FN_DECIMAL(0.7176470588), FN_DECIMAL(-0.1058823529), FN_DECIMAL(0.3960784314), FN_DECIMAL(0.0431372549), FN_DECIMAL(-0.03529411765), FN_DECIMAL(0.3176470588), FN_DECIMAL(0.7254901961), FN_DECIMAL(0.137254902), FN_DECIMAL(0.8588235294), FN_DECIMAL(-0.8196078431),
+	FN_DECIMAL(-0.7960784314), FN_DECIMAL(-0.3333333333), FN_DECIMAL(-0.6705882353), FN_DECIMAL(-0.3882352941), FN_DECIMAL(0.262745098), FN_DECIMAL(0.3254901961), FN_DECIMAL(-0.6470588235), FN_DECIMAL(-0.9215686275), FN_DECIMAL(-0.5294117647), FN_DECIMAL(0.5294117647), FN_DECIMAL(-0.4666666667), FN_DECIMAL(0.8117647059), FN_DECIMAL(0.3803921569), FN_DECIMAL(0.662745098), FN_DECIMAL(0.03529411765), FN_DECIMAL(-0.6156862745),
+	FN_DECIMAL(-0.01960784314), FN_DECIMAL(-0.3568627451), FN_DECIMAL(-0.09019607843), FN_DECIMAL(0.7490196078), FN_DECIMAL(0.8352941176), FN_DECIMAL(-0.4039215686), FN_DECIMAL(-0.7490196078), FN_DECIMAL(0.9529411765), FN_DECIMAL(-0.0431372549), FN_DECIMAL(-0.9294117647), FN_DECIMAL(-0.6549019608), FN_DECIMAL(0.9215686275), FN_DECIMAL(-0.06666666667), FN_DECIMAL(-0.4431372549), FN_DECIMAL(0.4117647059), FN_DECIMAL(-0.4196078431),
+	FN_DECIMAL(-0.7176470588), FN_DECIMAL(-0.8117647059), FN_DECIMAL(-0.2549019608), FN_DECIMAL(0.4901960784), FN_DECIMAL(0.9137254902), FN_DECIMAL(0.7882352941), FN_DECIMAL(-1.0), FN_DECIMAL(-0.4745098039), FN_DECIMAL(0.7960784314), FN_DECIMAL(0.8509803922), FN_DECIMAL(-0.6784313725), FN_DECIMAL(0.4588235294), FN_DECIMAL(1.0), FN_DECIMAL(-0.1843137255), FN_DECIMAL(0.4509803922), FN_DECIMAL(0.1450980392),
+	FN_DECIMAL(-0.231372549), FN_DECIMAL(-0.968627451), FN_DECIMAL(-0.8588235294), FN_DECIMAL(0.4274509804), FN_DECIMAL(0.003921568627), FN_DECIMAL(-0.003921568627), FN_DECIMAL(0.2156862745), FN_DECIMAL(0.5058823529), FN_DECIMAL(0.7647058824), FN_DECIMAL(0.2078431373), FN_DECIMAL(-0.5921568627), FN_DECIMAL(0.5764705882), FN_DECIMAL(-0.1921568627), FN_DECIMAL(-0.937254902), FN_DECIMAL(0.08235294118), FN_DECIMAL(-0.08235294118),
+	FN_DECIMAL(0.9058823529), FN_DECIMAL(0.8274509804), FN_DECIMAL(0.02745098039), FN_DECIMAL(-0.168627451), FN_DECIMAL(-0.7803921569), FN_DECIMAL(0.1137254902), FN_DECIMAL(-0.9450980392), FN_DECIMAL(0.2), FN_DECIMAL(0.01960784314), FN_DECIMAL(0.5607843137), FN_DECIMAL(0.2705882353), FN_DECIMAL(0.4431372549), FN_DECIMAL(-0.9607843137), FN_DECIMAL(0.6156862745), FN_DECIMAL(0.9294117647), FN_DECIMAL(-0.07450980392),
+	FN_DECIMAL(0.3098039216), FN_DECIMAL(0.9921568627), FN_DECIMAL(-0.9137254902), FN_DECIMAL(-0.2941176471), FN_DECIMAL(-0.3411764706), FN_DECIMAL(-0.6235294118), FN_DECIMAL(-0.7647058824), FN_DECIMAL(-0.8901960784), FN_DECIMAL(0.05882352941), FN_DECIMAL(0.2392156863), FN_DECIMAL(0.7333333333), FN_DECIMAL(0.6549019608), FN_DECIMAL(0.2470588235), FN_DECIMAL(0.231372549), FN_DECIMAL(-0.3960784314), FN_DECIMAL(-0.05098039216),
+	FN_DECIMAL(-0.2235294118), FN_DECIMAL(-0.3725490196), FN_DECIMAL(0.6235294118), FN_DECIMAL(0.7019607843), FN_DECIMAL(-0.8274509804), FN_DECIMAL(0.4196078431), FN_DECIMAL(0.07450980392), FN_DECIMAL(0.8666666667), FN_DECIMAL(-0.537254902), FN_DECIMAL(-0.5058823529), FN_DECIMAL(-0.8039215686), FN_DECIMAL(0.09019607843), FN_DECIMAL(-0.4823529412), FN_DECIMAL(0.6705882353), FN_DECIMAL(-0.7882352941), FN_DECIMAL(0.09803921569),
+	FN_DECIMAL(-0.6078431373), FN_DECIMAL(0.8039215686), FN_DECIMAL(-0.6), FN_DECIMAL(-0.3254901961), FN_DECIMAL(-0.4117647059), FN_DECIMAL(-0.01176470588), FN_DECIMAL(0.4823529412), FN_DECIMAL(0.168627451), FN_DECIMAL(0.8745098039), FN_DECIMAL(-0.3647058824), FN_DECIMAL(-0.1607843137), FN_DECIMAL(0.568627451), FN_DECIMAL(-0.9921568627), FN_DECIMAL(0.9450980392), FN_DECIMAL(0.5137254902), FN_DECIMAL(0.01176470588),
+	FN_DECIMAL(-0.1450980392), FN_DECIMAL(-0.5529411765), FN_DECIMAL(-0.5764705882), FN_DECIMAL(-0.1137254902), FN_DECIMAL(0.5215686275), FN_DECIMAL(0.1607843137), FN_DECIMAL(0.3725490196), FN_DECIMAL(-0.2), FN_DECIMAL(-0.7254901961), FN_DECIMAL(0.631372549), FN_DECIMAL(0.7098039216), FN_DECIMAL(-0.568627451), FN_DECIMAL(0.1294117647), FN_DECIMAL(-0.3098039216), FN_DECIMAL(0.7411764706), FN_DECIMAL(-0.8509803922),
+	FN_DECIMAL(0.2549019608), FN_DECIMAL(-0.6392156863), FN_DECIMAL(-0.5607843137), FN_DECIMAL(-0.3176470588), FN_DECIMAL(0.937254902), FN_DECIMAL(0.9843137255), FN_DECIMAL(0.5921568627), FN_DECIMAL(0.6941176471), FN_DECIMAL(0.2862745098), FN_DECIMAL(-0.5215686275), FN_DECIMAL(0.1764705882), FN_DECIMAL(0.537254902), FN_DECIMAL(-0.4901960784), FN_DECIMAL(-0.4588235294), FN_DECIMAL(-0.2078431373), FN_DECIMAL(-0.2156862745),
+	FN_DECIMAL(0.7725490196), FN_DECIMAL(0.3647058824), FN_DECIMAL(-0.2392156863), FN_DECIMAL(0.2784313725), FN_DECIMAL(-0.8823529412), FN_DECIMAL(0.8980392157), FN_DECIMAL(0.1215686275), FN_DECIMAL(0.1058823529), FN_DECIMAL(-0.8745098039), FN_DECIMAL(-0.9843137255), FN_DECIMAL(-0.7019607843), FN_DECIMAL(0.9607843137), FN_DECIMAL(0.2941176471), FN_DECIMAL(0.3411764706), FN_DECIMAL(0.1529411765), FN_DECIMAL(0.06666666667),
+	FN_DECIMAL(-0.9764705882), FN_DECIMAL(0.3019607843), FN_DECIMAL(0.6470588235), FN_DECIMAL(-0.5843137255), FN_DECIMAL(0.05098039216), FN_DECIMAL(-0.5137254902), FN_DECIMAL(-0.137254902), FN_DECIMAL(0.3882352941), FN_DECIMAL(-0.262745098), FN_DECIMAL(-0.3019607843), FN_DECIMAL(-0.1764705882), FN_DECIMAL(-0.7568627451), FN_DECIMAL(0.1843137255), FN_DECIMAL(-0.5450980392), FN_DECIMAL(-0.4980392157), FN_DECIMAL(-0.2784313725),
+	FN_DECIMAL(-0.9529411765), FN_DECIMAL(-0.09803921569), FN_DECIMAL(0.8901960784), FN_DECIMAL(-0.2862745098), FN_DECIMAL(-0.3803921569), FN_DECIMAL(0.5529411765), FN_DECIMAL(0.7803921569), FN_DECIMAL(-0.8352941176), FN_DECIMAL(0.6862745098), FN_DECIMAL(0.7568627451), FN_DECIMAL(0.4980392157), FN_DECIMAL(-0.6862745098), FN_DECIMAL(-0.8980392157), FN_DECIMAL(-0.7725490196), FN_DECIMAL(-0.7098039216), FN_DECIMAL(-0.2470588235),
+	FN_DECIMAL(-0.9058823529), FN_DECIMAL(0.9764705882), FN_DECIMAL(0.1921568627), FN_DECIMAL(0.8431372549), FN_DECIMAL(-0.05882352941), FN_DECIMAL(0.3568627451), FN_DECIMAL(0.6078431373), FN_DECIMAL(0.5450980392), FN_DECIMAL(0.4039215686), FN_DECIMAL(-0.7333333333), FN_DECIMAL(-0.4274509804), FN_DECIMAL(0.6), FN_DECIMAL(0.6784313725), FN_DECIMAL(-0.631372549), FN_DECIMAL(-0.02745098039), FN_DECIMAL(-0.1294117647),
+	FN_DECIMAL(0.3333333333), FN_DECIMAL(-0.8431372549), FN_DECIMAL(0.2235294118), FN_DECIMAL(-0.3490196078), FN_DECIMAL(-0.6941176471), FN_DECIMAL(0.8823529412), FN_DECIMAL(0.4745098039), FN_DECIMAL(0.4666666667), FN_DECIMAL(-0.7411764706), FN_DECIMAL(-0.2705882353), FN_DECIMAL(0.968627451), FN_DECIMAL(0.8196078431), FN_DECIMAL(-0.662745098), FN_DECIMAL(-0.4352941176), FN_DECIMAL(-0.8666666667), FN_DECIMAL(-0.1529411765),
+};
+
+const FN_DECIMAL CELL_2D_X[] =
+{
+	FN_DECIMAL(-0.6440658039), FN_DECIMAL(-0.08028078721), FN_DECIMAL(0.9983546168), FN_DECIMAL(0.9869492062), FN_DECIMAL(0.9284746418), FN_DECIMAL(0.6051097552), FN_DECIMAL(-0.794167404), FN_DECIMAL(-0.3488667991), FN_DECIMAL(-0.943136526), FN_DECIMAL(-0.9968171318), FN_DECIMAL(0.8740961579), FN_DECIMAL(0.1421139764), FN_DECIMAL(0.4282553608), FN_DECIMAL(-0.9986665833), FN_DECIMAL(0.9996760121), FN_DECIMAL(-0.06248383632),
+	FN_DECIMAL(0.7120139305), FN_DECIMAL(0.8917660409), FN_DECIMAL(0.1094842955), FN_DECIMAL(-0.8730880804), FN_DECIMAL(0.2594811489), FN_DECIMAL(-0.6690063346), FN_DECIMAL(-0.9996834972), FN_DECIMAL(-0.8803608671), FN_DECIMAL(-0.8166554937), FN_DECIMAL(0.8955599676), FN_DECIMAL(-0.9398321388), FN_DECIMAL(0.07615451399), FN_DECIMAL(-0.7147270565), FN_DECIMAL(0.8707354457), FN_DECIMAL(-0.9580008579), FN_DECIMAL(0.4905965632),
+	FN_DECIMAL(0.786775944), FN_DECIMAL(0.1079711577), FN_DECIMAL(0.2686638979), FN_DECIMAL(0.6113487322), FN_DECIMAL(-0.530770584), FN_DECIMAL(-0.7837268286), FN_DECIMAL(-0.8558691039), FN_DECIMAL(-0.5726093896), FN_DECIMAL(-0.9830740914), FN_DECIMAL(0.7087766359), FN_DECIMAL(0.6807027153), FN_DECIMAL(-0.08864708788), FN_DECIMAL(0.6704485923), FN_DECIMAL(-0.1350735482), FN_DECIMAL(-0.9381333003), FN_DECIMAL(0.9756655376),
+	FN_DECIMAL(0.4231433671), FN_DECIMAL(-0.4959787385), FN_DECIMAL(0.1005554325), FN_DECIMAL(-0.7645857281), FN_DECIMAL(-0.5859053796), FN_DECIMAL(-0.9751154306), FN_DECIMAL(-0.6972258572), FN_DECIMAL(0.7907012002), FN_DECIMAL(-0.9109899213), FN_DECIMAL(-0.9584307894), FN_DECIMAL(-0.8269529333), FN_DECIMAL(0.2608264719), FN_DECIMAL(-0.7773760119), FN_DECIMAL(0.7606456974), FN_DECIMAL(-0.8961083758), FN_DECIMAL(-0.9838134719),
+	FN_DECIMAL(0.7338893576), FN_DECIMAL(0.2161226729), FN_DECIMAL(0.673509891), FN_DECIMAL(-0.5512056873), FN_DECIMAL(0.6899744332), FN_DECIMAL(0.868004831), FN_DECIMAL(0.5897430311), FN_DECIMAL(-0.8950444221), FN_DECIMAL(-0.3595752773), FN_DECIMAL(0.8209486981), FN_DECIMAL(-0.2912360132), FN_DECIMAL(-0.9965011374), FN_DECIMAL(0.9766994634), FN_DECIMAL(0.738790822), FN_DECIMAL(-0.4730947722), FN_DECIMAL(0.8946479441),
+	FN_DECIMAL(-0.6943628971), FN_DECIMAL(-0.6620468182), FN_DECIMAL(-0.0887255502), FN_DECIMAL(-0.7512250855), FN_DECIMAL(-0.5322986898), FN_DECIMAL(0.5226295385), FN_DECIMAL(0.2296318375), FN_DECIMAL(0.7915307344), FN_DECIMAL(-0.2756485999), FN_DECIMAL(-0.6900234522), FN_DECIMAL(0.07090588086), FN_DECIMAL(0.5981278485), FN_DECIMAL(0.3033429312), FN_DECIMAL(-0.7253142797), FN_DECIMAL(-0.9855874307), FN_DECIMAL(-0.1761843396),
+	FN_DECIMAL(-0.6438468325), FN_DECIMAL(-0.9956136595), FN_DECIMAL(0.8541580762), FN_DECIMAL(-0.9999807666), FN_DECIMAL(-0.02152416253), FN_DECIMAL(-0.8705983095), FN_DECIMAL(-0.1197138014), FN_DECIMAL(-0.992107781), FN_DECIMAL(-0.9091181546), FN_DECIMAL(0.788610536), FN_DECIMAL(-0.994636402), FN_DECIMAL(0.4211256853), FN_DECIMAL(0.3110430857), FN_DECIMAL(-0.4031127839), FN_DECIMAL(0.7610684239), FN_DECIMAL(0.7685674467),
+	FN_DECIMAL(0.152271555), FN_DECIMAL(-0.9364648723), FN_DECIMAL(0.1681333739), FN_DECIMAL(-0.3567427907), FN_DECIMAL(-0.418445483), FN_DECIMAL(-0.98774778), FN_DECIMAL(0.8705250765), FN_DECIMAL(-0.8911701067), FN_DECIMAL(-0.7315350966), FN_DECIMAL(0.6030885658), FN_DECIMAL(-0.4149130821), FN_DECIMAL(0.7585339481), FN_DECIMAL(0.6963196535), FN_DECIMAL(0.8332685012), FN_DECIMAL(-0.8086815232), FN_DECIMAL(0.7518116724),
+	FN_DECIMAL(-0.3490535894), FN_DECIMAL(0.6972110903), FN_DECIMAL(-0.8795676928), FN_DECIMAL(-0.6442331882), FN_DECIMAL(0.6610236811), FN_DECIMAL(-0.9853565782), FN_DECIMAL(-0.590338458), FN_DECIMAL(0.09843602117), FN_DECIMAL(0.5646534882), FN_DECIMAL(-0.6023259233), FN_DECIMAL(-0.3539248861), FN_DECIMAL(0.5132728656), FN_DECIMAL(0.9380385118), FN_DECIMAL(-0.7599270056), FN_DECIMAL(-0.7425936564), FN_DECIMAL(-0.6679610562),
+	FN_DECIMAL(-0.3018497816), FN_DECIMAL(0.814478266), FN_DECIMAL(0.03777430269), FN_DECIMAL(-0.7514235086), FN_DECIMAL(0.9662556939), FN_DECIMAL(-0.4720194901), FN_DECIMAL(-0.435054126), FN_DECIMAL(0.7091901235), FN_DECIMAL(0.929379209), FN_DECIMAL(0.9997434357), FN_DECIMAL(0.8306320299), FN_DECIMAL(-0.9434019629), FN_DECIMAL(-0.133133759), FN_DECIMAL(0.5048413216), FN_DECIMAL(0.3711995273), FN_DECIMAL(0.98552091),
+	FN_DECIMAL(0.7401857005), FN_DECIMAL(-0.9999981398), FN_DECIMAL(-0.2144033253), FN_DECIMAL(0.4808624681), FN_DECIMAL(-0.413835885), FN_DECIMAL(0.644229305), FN_DECIMAL(0.9626648696), FN_DECIMAL(0.1833665934), FN_DECIMAL(0.5794129), FN_DECIMAL(0.01404446873), FN_DECIMAL(0.4388494993), FN_DECIMAL(0.5213612322), FN_DECIMAL(-0.5281609948), FN_DECIMAL(-0.9745306846), FN_DECIMAL(-0.9904373013), FN_DECIMAL(0.9100232252),
+	FN_DECIMAL(-0.9914057719), FN_DECIMAL(0.7892627765), FN_DECIMAL(0.3364421659), FN_DECIMAL(-0.9416099764), FN_DECIMAL(0.7802732656), FN_DECIMAL(0.886302871), FN_DECIMAL(0.6524471291), FN_DECIMAL(0.5762186726), FN_DECIMAL(-0.08987644664), FN_DECIMAL(-0.2177026782), FN_DECIMAL(-0.9720345052), FN_DECIMAL(-0.05722538858), FN_DECIMAL(0.8105983127), FN_DECIMAL(0.3410261032), FN_DECIMAL(0.6452309645), FN_DECIMAL(-0.7810612152),
+	FN_DECIMAL(0.9989395718), FN_DECIMAL(-0.808247815), FN_DECIMAL(0.6370177929), FN_DECIMAL(0.5844658772), FN_DECIMAL(0.2054070861), FN_DECIMAL(0.055960522), FN_DECIMAL(-0.995827561), FN_DECIMAL(0.893409165), FN_DECIMAL(-0.931516824), FN_DECIMAL(0.328969469), FN_DECIMAL(-0.3193837488), FN_DECIMAL(0.7314755657), FN_DECIMAL(-0.7913517714), FN_DECIMAL(-0.2204109786), FN_DECIMAL(0.9955900414), FN_DECIMAL(-0.7112353139),
+	FN_DECIMAL(-0.7935008741), FN_DECIMAL(-0.9961918204), FN_DECIMAL(-0.9714163995), FN_DECIMAL(-0.9566188669), FN_DECIMAL(0.2748495632), FN_DECIMAL(-0.4681743221), FN_DECIMAL(-0.9614449642), FN_DECIMAL(0.585194072), FN_DECIMAL(0.4532946061), FN_DECIMAL(-0.9916113176), FN_DECIMAL(0.942479587), FN_DECIMAL(-0.9813704753), FN_DECIMAL(-0.6538429571), FN_DECIMAL(0.2923335053), FN_DECIMAL(-0.2246660704), FN_DECIMAL(-0.1800781949),
+	FN_DECIMAL(-0.9581216256), FN_DECIMAL(0.552215082), FN_DECIMAL(-0.9296791922), FN_DECIMAL(0.643183699), FN_DECIMAL(0.9997325981), FN_DECIMAL(-0.4606920354), FN_DECIMAL(-0.2148721265), FN_DECIMAL(0.3482070809), FN_DECIMAL(0.3075517813), FN_DECIMAL(0.6274756393), FN_DECIMAL(0.8910881765), FN_DECIMAL(-0.6397771309), FN_DECIMAL(-0.4479080125), FN_DECIMAL(-0.5247665011), FN_DECIMAL(-0.8386507094), FN_DECIMAL(0.3901291416),
+	FN_DECIMAL(0.1458336921), FN_DECIMAL(0.01624613149), FN_DECIMAL(-0.8273199879), FN_DECIMAL(0.5611100679), FN_DECIMAL(-0.8380219841), FN_DECIMAL(-0.9856122234), FN_DECIMAL(-0.861398618), FN_DECIMAL(0.6398413916), FN_DECIMAL(0.2694510795), FN_DECIMAL(0.4327334514), FN_DECIMAL(-0.9960265354), FN_DECIMAL(-0.939570655), FN_DECIMAL(-0.8846996446), FN_DECIMAL(0.7642113189), FN_DECIMAL(-0.7002080528), FN_DECIMAL(0.664508256),
+};
+const FN_DECIMAL CELL_2D_Y[] =
+{
+	FN_DECIMAL(0.7649700911), FN_DECIMAL(0.9967722885), FN_DECIMAL(0.05734160033), FN_DECIMAL(-0.1610318741), FN_DECIMAL(0.371395799), FN_DECIMAL(-0.7961420628), FN_DECIMAL(0.6076990492), FN_DECIMAL(-0.9371723195), FN_DECIMAL(0.3324056156), FN_DECIMAL(0.07972205329), FN_DECIMAL(-0.4857529277), FN_DECIMAL(-0.9898503007), FN_DECIMAL(0.9036577593), FN_DECIMAL(0.05162417479), FN_DECIMAL(-0.02545330525), FN_DECIMAL(-0.998045976),
+	FN_DECIMAL(-0.7021653386), FN_DECIMAL(-0.4524967717), FN_DECIMAL(-0.9939885256), FN_DECIMAL(-0.4875625128), FN_DECIMAL(-0.9657481729), FN_DECIMAL(-0.7432567015), FN_DECIMAL(0.02515761212), FN_DECIMAL(0.4743044842), FN_DECIMAL(0.5771254669), FN_DECIMAL(0.4449408324), FN_DECIMAL(0.3416365773), FN_DECIMAL(0.9970960285), FN_DECIMAL(0.6994034849), FN_DECIMAL(0.4917517499), FN_DECIMAL(0.286765333), FN_DECIMAL(0.8713868327),
+	FN_DECIMAL(0.6172387009), FN_DECIMAL(0.9941540269), FN_DECIMAL(0.9632339851), FN_DECIMAL(-0.7913613129), FN_DECIMAL(0.847515538), FN_DECIMAL(0.6211056739), FN_DECIMAL(0.5171924952), FN_DECIMAL(-0.8198283277), FN_DECIMAL(-0.1832084353), FN_DECIMAL(0.7054329737), FN_DECIMAL(0.7325597678), FN_DECIMAL(0.9960630973), FN_DECIMAL(0.7419559859), FN_DECIMAL(0.9908355749), FN_DECIMAL(-0.346274329), FN_DECIMAL(0.2192641299),
+	FN_DECIMAL(-0.9060627411), FN_DECIMAL(-0.8683346653), FN_DECIMAL(0.9949314574), FN_DECIMAL(-0.6445220433), FN_DECIMAL(-0.8103794704), FN_DECIMAL(-0.2216977607), FN_DECIMAL(0.7168515217), FN_DECIMAL(0.612202264), FN_DECIMAL(-0.412428616), FN_DECIMAL(0.285325116), FN_DECIMAL(0.56227115), FN_DECIMAL(-0.9653857009), FN_DECIMAL(-0.6290361962), FN_DECIMAL(0.6491672535), FN_DECIMAL(0.443835306), FN_DECIMAL(-0.1791955706),
+	FN_DECIMAL(-0.6792690269), FN_DECIMAL(-0.9763662173), FN_DECIMAL(0.7391782104), FN_DECIMAL(0.8343693968), FN_DECIMAL(0.7238337389), FN_DECIMAL(0.4965557504), FN_DECIMAL(0.8075909592), FN_DECIMAL(-0.4459769977), FN_DECIMAL(-0.9331160806), FN_DECIMAL(-0.5710019572), FN_DECIMAL(0.9566512346), FN_DECIMAL(-0.08357920318), FN_DECIMAL(0.2146116448), FN_DECIMAL(-0.6739348049), FN_DECIMAL(0.8810115417), FN_DECIMAL(0.4467718167),
+	FN_DECIMAL(-0.7196250184), FN_DECIMAL(-0.749462481), FN_DECIMAL(0.9960561112), FN_DECIMAL(0.6600461127), FN_DECIMAL(-0.8465566164), FN_DECIMAL(-0.8525598897), FN_DECIMAL(-0.9732775654), FN_DECIMAL(0.6111293616), FN_DECIMAL(-0.9612584717), FN_DECIMAL(-0.7237870097), FN_DECIMAL(-0.9974830104), FN_DECIMAL(-0.8014006968), FN_DECIMAL(0.9528814544), FN_DECIMAL(-0.6884178931), FN_DECIMAL(-0.1691668301), FN_DECIMAL(0.9843571905),
+	FN_DECIMAL(0.7651544003), FN_DECIMAL(-0.09355982605), FN_DECIMAL(-0.5200134429), FN_DECIMAL(-0.006202125807), FN_DECIMAL(-0.9997683284), FN_DECIMAL(0.4919944954), FN_DECIMAL(-0.9928084436), FN_DECIMAL(-0.1253880012), FN_DECIMAL(-0.4165383308), FN_DECIMAL(-0.6148930171), FN_DECIMAL(-0.1034332049), FN_DECIMAL(-0.9070022917), FN_DECIMAL(-0.9503958117), FN_DECIMAL(0.9151503065), FN_DECIMAL(-0.6486716073), FN_DECIMAL(0.6397687707),
+	FN_DECIMAL(-0.9883386937), FN_DECIMAL(0.3507613761), FN_DECIMAL(0.9857642561), FN_DECIMAL(-0.9342026446), FN_DECIMAL(-0.9082419159), FN_DECIMAL(0.1560587169), FN_DECIMAL(0.4921240607), FN_DECIMAL(-0.453669308), FN_DECIMAL(0.6818037859), FN_DECIMAL(0.7976742329), FN_DECIMAL(0.9098610522), FN_DECIMAL(0.651633524), FN_DECIMAL(0.7177318024), FN_DECIMAL(-0.5528685241), FN_DECIMAL(0.5882467118), FN_DECIMAL(0.6593778956),
+	FN_DECIMAL(0.9371027648), FN_DECIMAL(-0.7168658839), FN_DECIMAL(-0.4757737632), FN_DECIMAL(0.7648291307), FN_DECIMAL(0.7503650398), FN_DECIMAL(0.1705063456), FN_DECIMAL(-0.8071558121), FN_DECIMAL(-0.9951433815), FN_DECIMAL(-0.8253280792), FN_DECIMAL(-0.7982502628), FN_DECIMAL(0.9352738503), FN_DECIMAL(0.8582254747), FN_DECIMAL(-0.3465310238), FN_DECIMAL(0.65000842), FN_DECIMAL(-0.6697422351), FN_DECIMAL(0.7441962291),
+	FN_DECIMAL(-0.9533555), FN_DECIMAL(0.5801940659), FN_DECIMAL(-0.9992862963), FN_DECIMAL(-0.659820211), FN_DECIMAL(0.2575848092), FN_DECIMAL(0.881588113), FN_DECIMAL(-0.9004043022), FN_DECIMAL(-0.7050172826), FN_DECIMAL(0.369126382), FN_DECIMAL(-0.02265088836), FN_DECIMAL(0.5568217228), FN_DECIMAL(-0.3316515286), FN_DECIMAL(0.991098079), FN_DECIMAL(-0.863212164), FN_DECIMAL(-0.9285531277), FN_DECIMAL(0.1695539323),
+	FN_DECIMAL(-0.672402505), FN_DECIMAL(-0.001928841934), FN_DECIMAL(0.9767452145), FN_DECIMAL(-0.8767960349), FN_DECIMAL(0.9103515037), FN_DECIMAL(-0.7648324016), FN_DECIMAL(0.2706960452), FN_DECIMAL(-0.9830446035), FN_DECIMAL(0.8150341657), FN_DECIMAL(-0.9999013716), FN_DECIMAL(-0.8985605806), FN_DECIMAL(0.8533360801), FN_DECIMAL(0.8491442537), FN_DECIMAL(-0.2242541966), FN_DECIMAL(-0.1379635899), FN_DECIMAL(-0.4145572694),
+	FN_DECIMAL(0.1308227633), FN_DECIMAL(0.6140555916), FN_DECIMAL(0.9417041303), FN_DECIMAL(-0.336705587), FN_DECIMAL(-0.6254387508), FN_DECIMAL(0.4631060578), FN_DECIMAL(-0.7578342456), FN_DECIMAL(-0.8172955655), FN_DECIMAL(-0.9959529228), FN_DECIMAL(-0.9760151351), FN_DECIMAL(0.2348380732), FN_DECIMAL(-0.9983612848), FN_DECIMAL(0.5856025746), FN_DECIMAL(-0.9400538266), FN_DECIMAL(-0.7639875669), FN_DECIMAL(0.6244544645),
+	FN_DECIMAL(0.04604054566), FN_DECIMAL(0.5888424828), FN_DECIMAL(0.7708490978), FN_DECIMAL(-0.8114182882), FN_DECIMAL(0.9786766212), FN_DECIMAL(-0.9984329822), FN_DECIMAL(0.09125496582), FN_DECIMAL(-0.4492438803), FN_DECIMAL(-0.3636982357), FN_DECIMAL(0.9443405575), FN_DECIMAL(-0.9476254645), FN_DECIMAL(-0.6818676535), FN_DECIMAL(-0.6113610831), FN_DECIMAL(0.9754070948), FN_DECIMAL(-0.0938108173), FN_DECIMAL(-0.7029540015),
+	FN_DECIMAL(-0.6085691109), FN_DECIMAL(-0.08718862881), FN_DECIMAL(-0.237381926), FN_DECIMAL(0.2913423132), FN_DECIMAL(0.9614872426), FN_DECIMAL(0.8836361266), FN_DECIMAL(-0.2749974196), FN_DECIMAL(-0.8108932717), FN_DECIMAL(-0.8913607575), FN_DECIMAL(0.129255541), FN_DECIMAL(-0.3342637104), FN_DECIMAL(-0.1921249337), FN_DECIMAL(-0.7566302845), FN_DECIMAL(-0.9563164339), FN_DECIMAL(-0.9744358146), FN_DECIMAL(0.9836522982),
+	FN_DECIMAL(-0.2863615732), FN_DECIMAL(0.8337016872), FN_DECIMAL(0.3683701937), FN_DECIMAL(0.7657119102), FN_DECIMAL(-0.02312427772), FN_DECIMAL(0.8875600535), FN_DECIMAL(0.976642191), FN_DECIMAL(0.9374176384), FN_DECIMAL(0.9515313457), FN_DECIMAL(-0.7786361937), FN_DECIMAL(-0.4538302125), FN_DECIMAL(-0.7685604874), FN_DECIMAL(-0.8940796454), FN_DECIMAL(-0.8512462154), FN_DECIMAL(0.5446696133), FN_DECIMAL(0.9207601495),
+	FN_DECIMAL(-0.9893091197), FN_DECIMAL(-0.9998680229), FN_DECIMAL(0.5617309299), FN_DECIMAL(-0.8277411985), FN_DECIMAL(0.545636467), FN_DECIMAL(0.1690223212), FN_DECIMAL(-0.5079295433), FN_DECIMAL(0.7685069899), FN_DECIMAL(-0.9630140787), FN_DECIMAL(0.9015219132), FN_DECIMAL(0.08905695279), FN_DECIMAL(-0.3423550559), FN_DECIMAL(-0.4661614943), FN_DECIMAL(-0.6449659371), FN_DECIMAL(0.7139388509), FN_DECIMAL(0.7472809229),
+};
+const FN_DECIMAL CELL_3D_X[] =
+{
+	FN_DECIMAL(0.3752498686), FN_DECIMAL(0.687188096), FN_DECIMAL(0.2248135212), FN_DECIMAL(0.6692006647), FN_DECIMAL(-0.4376476931), FN_DECIMAL(0.6139972552), FN_DECIMAL(0.9494563929), FN_DECIMAL(0.8065108882), FN_DECIMAL(-0.2218812853), FN_DECIMAL(0.8484661167), FN_DECIMAL(0.5551817596), FN_DECIMAL(0.2133903499), FN_DECIMAL(0.5195126593), FN_DECIMAL(-0.6440141975), FN_DECIMAL(-0.5192897331), FN_DECIMAL(-0.3697654077),
+	FN_DECIMAL(-0.07927779647), FN_DECIMAL(0.4187757321), FN_DECIMAL(-0.750078731), FN_DECIMAL(0.6579554632), FN_DECIMAL(-0.6859803838), FN_DECIMAL(-0.6878407087), FN_DECIMAL(0.9490848347), FN_DECIMAL(0.5795829433), FN_DECIMAL(-0.5325976529), FN_DECIMAL(-0.1363699466), FN_DECIMAL(0.417665879), FN_DECIMAL(-0.9108236468), FN_DECIMAL(0.4438605427), FN_DECIMAL(0.819294887), FN_DECIMAL(-0.4033873915), FN_DECIMAL(-0.2817317705),
+	FN_DECIMAL(0.3969665622), FN_DECIMAL(0.5323450134), FN_DECIMAL(-0.6833017297), FN_DECIMAL(0.3881436661), FN_DECIMAL(-0.7119144767), FN_DECIMAL(-0.2306979838), FN_DECIMAL(-0.9398873022), FN_DECIMAL(0.1701906676), FN_DECIMAL(-0.4261839496), FN_DECIMAL(-0.003712295499), FN_DECIMAL(-0.734675004), FN_DECIMAL(-0.3195046015), FN_DECIMAL(0.7345307424), FN_DECIMAL(0.9766246496), FN_DECIMAL(-0.02003735175), FN_DECIMAL(-0.4824156342),
+	FN_DECIMAL(0.4245892007), FN_DECIMAL(0.9072427669), FN_DECIMAL(0.593346808), FN_DECIMAL(-0.8911762541), FN_DECIMAL(-0.7657571834), FN_DECIMAL(-0.5268198896), FN_DECIMAL(-0.8801903279), FN_DECIMAL(-0.6296409617), FN_DECIMAL(-0.09492481344), FN_DECIMAL(-0.4920470525), FN_DECIMAL(0.7307666154), FN_DECIMAL(-0.2514540636), FN_DECIMAL(-0.3356210347), FN_DECIMAL(-0.3522787894), FN_DECIMAL(0.87847885), FN_DECIMAL(-0.7424096346),
+	FN_DECIMAL(0.5757585274), FN_DECIMAL(0.4519299338), FN_DECIMAL(0.6420368628), FN_DECIMAL(-0.1128478447), FN_DECIMAL(0.499874883), FN_DECIMAL(0.5291681739), FN_DECIMAL(-0.5098837195), FN_DECIMAL(0.5639583502), FN_DECIMAL(-0.8456386526), FN_DECIMAL(-0.9657134875), FN_DECIMAL(-0.576437342), FN_DECIMAL(-0.5666013014), FN_DECIMAL(0.5667702405), FN_DECIMAL(-0.481316582), FN_DECIMAL(0.7313389916), FN_DECIMAL(-0.3805628566),
+	FN_DECIMAL(-0.6512675909), FN_DECIMAL(-0.2787156951), FN_DECIMAL(0.8648059114), FN_DECIMAL(-0.9730216276), FN_DECIMAL(-0.8335820906), FN_DECIMAL(0.2673159641), FN_DECIMAL(0.231150148), FN_DECIMAL(0.01286214638), FN_DECIMAL(0.6774953261), FN_DECIMAL(0.6542885718), FN_DECIMAL(-0.02545450161), FN_DECIMAL(0.2101238586), FN_DECIMAL(-0.5572105885), FN_DECIMAL(0.813705672), FN_DECIMAL(-0.7546026951), FN_DECIMAL(-0.2502500006),
+	FN_DECIMAL(-0.9979289381), FN_DECIMAL(0.7024037039), FN_DECIMAL(0.08990874624), FN_DECIMAL(0.8170812432), FN_DECIMAL(0.4226980265), FN_DECIMAL(-0.2442153475), FN_DECIMAL(-0.9183326731), FN_DECIMAL(0.6068222411), FN_DECIMAL(0.818676691), FN_DECIMAL(-0.7236735282), FN_DECIMAL(-0.5383903295), FN_DECIMAL(-0.6269337242), FN_DECIMAL(-0.0939331121), FN_DECIMAL(0.9203878539), FN_DECIMAL(-0.7256396824), FN_DECIMAL(0.6292431149),
+	FN_DECIMAL(0.4234156978), FN_DECIMAL(0.006685688024), FN_DECIMAL(-0.2598694113), FN_DECIMAL(0.6408036421), FN_DECIMAL(0.05899871622), FN_DECIMAL(0.7090281418), FN_DECIMAL(-0.5905222072), FN_DECIMAL(0.3128214264), FN_DECIMAL(-0.691925826), FN_DECIMAL(0.3634019349), FN_DECIMAL(-0.6772511147), FN_DECIMAL(-0.3204583896), FN_DECIMAL(-0.3906740409), FN_DECIMAL(-0.3342190395), FN_DECIMAL(-0.517779592), FN_DECIMAL(-0.6817711267),
+	FN_DECIMAL(0.6422383105), FN_DECIMAL(0.4388482478), FN_DECIMAL(0.2968562611), FN_DECIMAL(-0.2019778353), FN_DECIMAL(0.6014865048), FN_DECIMAL(0.9519280722), FN_DECIMAL(0.3398889569), FN_DECIMAL(0.8179709354), FN_DECIMAL(0.2365522154), FN_DECIMAL(0.3262175096), FN_DECIMAL(-0.8060715954), FN_DECIMAL(-0.2068642503), FN_DECIMAL(0.6208057279), FN_DECIMAL(-0.5274282502), FN_DECIMAL(-0.3722334928), FN_DECIMAL(-0.8923412971),
+	FN_DECIMAL(0.5341834201), FN_DECIMAL(-0.3663701513), FN_DECIMAL(-0.6114600319), FN_DECIMAL(0.5026307556), FN_DECIMAL(0.8396151729), FN_DECIMAL(0.9245042467), FN_DECIMAL(-0.7994843957), FN_DECIMAL(-0.5357200589), FN_DECIMAL(-0.6283359739), FN_DECIMAL(-0.61351886), FN_DECIMAL(-0.875632008), FN_DECIMAL(-0.5278879423), FN_DECIMAL(0.9087491985), FN_DECIMAL(-0.03500215466), FN_DECIMAL(-0.261365798), FN_DECIMAL(-0.579523541),
+	FN_DECIMAL(-0.3765052689), FN_DECIMAL(-0.74398252), FN_DECIMAL(0.4257318052), FN_DECIMAL(-0.1214508921), FN_DECIMAL(0.8561809753), FN_DECIMAL(0.6802835104), FN_DECIMAL(-0.5452131039), FN_DECIMAL(-0.1997156478), FN_DECIMAL(0.4562348357), FN_DECIMAL(-0.811704301), FN_DECIMAL(0.67793962), FN_DECIMAL(-0.9237819106), FN_DECIMAL(0.6973511259), FN_DECIMAL(-0.5189506), FN_DECIMAL(0.5517320032), FN_DECIMAL(-0.396710831),
+	FN_DECIMAL(0.5493762815), FN_DECIMAL(-0.2507853002), FN_DECIMAL(0.4788634005), FN_DECIMAL(0.387333516), FN_DECIMAL(-0.2176515694), FN_DECIMAL(0.6749832419), FN_DECIMAL(0.2148283022), FN_DECIMAL(-0.7521815872), FN_DECIMAL(0.4697000159), FN_DECIMAL(0.7890593699), FN_DECIMAL(-0.7606162952), FN_DECIMAL(0.01083397843), FN_DECIMAL(0.5254091908), FN_DECIMAL(-0.6748025877), FN_DECIMAL(0.751091524), FN_DECIMAL(0.05259056135),
+	FN_DECIMAL(0.01889481232), FN_DECIMAL(-0.6037423727), FN_DECIMAL(-0.6542965129), FN_DECIMAL(0.08873301081), FN_DECIMAL(-0.6191345671), FN_DECIMAL(0.4331858488), FN_DECIMAL(-0.3858351946), FN_DECIMAL(-0.1429059747), FN_DECIMAL(0.4118221036), FN_DECIMAL(-0.6247153214), FN_DECIMAL(-0.611423014), FN_DECIMAL(0.5542939606), FN_DECIMAL(-0.9432768808), FN_DECIMAL(-0.4567870451), FN_DECIMAL(-0.7349133547), FN_DECIMAL(0.399304489),
+	FN_DECIMAL(-0.7474927672), FN_DECIMAL(0.02589419753), FN_DECIMAL(0.783915821), FN_DECIMAL(0.6138668752), FN_DECIMAL(0.4276376047), FN_DECIMAL(-0.4347886353), FN_DECIMAL(0.02947841302), FN_DECIMAL(-0.833742746), FN_DECIMAL(0.3817221742), FN_DECIMAL(-0.8743368359), FN_DECIMAL(-0.3823443796), FN_DECIMAL(-0.6829243811), FN_DECIMAL(-0.3681903049), FN_DECIMAL(-0.367626833), FN_DECIMAL(-0.434583373), FN_DECIMAL(0.235891995),
+	FN_DECIMAL(-0.6874880269), FN_DECIMAL(-0.5115661773), FN_DECIMAL(-0.5534962601), FN_DECIMAL(0.5632777056), FN_DECIMAL(0.686191532), FN_DECIMAL(-0.05095871588), FN_DECIMAL(-0.06865785057), FN_DECIMAL(-0.5975288531), FN_DECIMAL(-0.6429790056), FN_DECIMAL(-0.3729361548), FN_DECIMAL(0.2237917666), FN_DECIMAL(0.6046773225), FN_DECIMAL(-0.5041542295), FN_DECIMAL(-0.03972191174), FN_DECIMAL(0.7028828406), FN_DECIMAL(-0.5560856498),
+	FN_DECIMAL(0.5898328456), FN_DECIMAL(-0.9308076766), FN_DECIMAL(0.4617069864), FN_DECIMAL(0.3190983137), FN_DECIMAL(0.9116567753), FN_DECIMAL(-0.45029554), FN_DECIMAL(0.3346334459), FN_DECIMAL(0.8525005645), FN_DECIMAL(0.2528483381), FN_DECIMAL(-0.8306630147), FN_DECIMAL(-0.6880390622), FN_DECIMAL(0.7448684026), FN_DECIMAL(-0.1963355843), FN_DECIMAL(-0.5900257974), FN_DECIMAL(0.9097057294), FN_DECIMAL(-0.2509196808),
+};
+const FN_DECIMAL CELL_3D_Y[] =
+{
+	FN_DECIMAL(-0.6760585049), FN_DECIMAL(-0.09136176499), FN_DECIMAL(0.1681325679), FN_DECIMAL(-0.6688468686), FN_DECIMAL(-0.4822753902), FN_DECIMAL(-0.7891068824), FN_DECIMAL(-0.1877509944), FN_DECIMAL(0.548470914), FN_DECIMAL(-0.463339443), FN_DECIMAL(-0.4050542082), FN_DECIMAL(0.3218158513), FN_DECIMAL(0.2546493823), FN_DECIMAL(-0.3753271935), FN_DECIMAL(0.4745384887), FN_DECIMAL(0.481254652), FN_DECIMAL(-0.8934416489),
+	FN_DECIMAL(-0.6737085076), FN_DECIMAL(0.7469917228), FN_DECIMAL(0.3826230411), FN_DECIMAL(0.6751013678), FN_DECIMAL(-0.7248119515), FN_DECIMAL(-0.3224276742), FN_DECIMAL(-0.02076190936), FN_DECIMAL(-0.6404268166), FN_DECIMAL(-0.5292028444), FN_DECIMAL(0.7151414636), FN_DECIMAL(-0.6144655059), FN_DECIMAL(-0.369912124), FN_DECIMAL(0.6942067212), FN_DECIMAL(-0.4481558248), FN_DECIMAL(-0.6366894559), FN_DECIMAL(0.5956568471),
+	FN_DECIMAL(0.564274539), FN_DECIMAL(0.7145584688), FN_DECIMAL(0.6871918316), FN_DECIMAL(0.5657918509), FN_DECIMAL(-0.6275978114), FN_DECIMAL(0.4146983062), FN_DECIMAL(0.2638993789), FN_DECIMAL(-0.792633138), FN_DECIMAL(0.5706133514), FN_DECIMAL(0.8606546462), FN_DECIMAL(0.6490900316), FN_DECIMAL(-0.8242699196), FN_DECIMAL(0.6765819124), FN_DECIMAL(0.1959534069), FN_DECIMAL(-0.8426769757), FN_DECIMAL(-0.5917672797),
+	FN_DECIMAL(0.7517364266), FN_DECIMAL(0.03252559226), FN_DECIMAL(0.0883617105), FN_DECIMAL(0.4475064813), FN_DECIMAL(-0.1418643552), FN_DECIMAL(0.7343428473), FN_DECIMAL(0.3870192548), FN_DECIMAL(-0.7716703522), FN_DECIMAL(0.4839898327), FN_DECIMAL(0.7437439055), FN_DECIMAL(-0.5989573348), FN_DECIMAL(-0.8357068955), FN_DECIMAL(0.6086049038), FN_DECIMAL(0.9194627258), FN_DECIMAL(0.4718297238), FN_DECIMAL(-0.2650335884),
+	FN_DECIMAL(-0.6470352599), FN_DECIMAL(-0.5555181303), FN_DECIMAL(0.1222351235), FN_DECIMAL(0.7802044684), FN_DECIMAL(-0.8636947022), FN_DECIMAL(-0.2341352163), FN_DECIMAL(0.683030874), FN_DECIMAL(-0.5005858287), FN_DECIMAL(0.2334616211), FN_DECIMAL(0.2576877608), FN_DECIMAL(0.6666816727), FN_DECIMAL(-0.7663996863), FN_DECIMAL(0.794201982), FN_DECIMAL(0.6189308788), FN_DECIMAL(0.6071033261), FN_DECIMAL(-0.4206058253),
+	FN_DECIMAL(-0.3957336915), FN_DECIMAL(-0.8170257484), FN_DECIMAL(-0.1043240417), FN_DECIMAL(0.0002167596213), FN_DECIMAL(0.1816339018), FN_DECIMAL(-0.6838094939), FN_DECIMAL(-0.2495341969), FN_DECIMAL(-0.7116756954), FN_DECIMAL(-0.03361673621), FN_DECIMAL(-0.3350836431), FN_DECIMAL(0.2137186039), FN_DECIMAL(0.2557996786), FN_DECIMAL(0.7490117093), FN_DECIMAL(0.4942936549), FN_DECIMAL(-0.352686853), FN_DECIMAL(-0.3952445435),
+	FN_DECIMAL(-0.0459964767), FN_DECIMAL(-0.7115787471), FN_DECIMAL(0.08022899756), FN_DECIMAL(0.5362268157), FN_DECIMAL(-0.8258613686), FN_DECIMAL(0.1114171723), FN_DECIMAL(0.3882823051), FN_DECIMAL(-0.7915404457), FN_DECIMAL(0.3250957662), FN_DECIMAL(0.6401346464), FN_DECIMAL(-0.2662724517), FN_DECIMAL(-0.6727907114), FN_DECIMAL(-0.994730818), FN_DECIMAL(-0.3596358977), FN_DECIMAL(0.2344610069), FN_DECIMAL(-0.6645215546),
+	FN_DECIMAL(-0.7107590611), FN_DECIMAL(-0.4646617327), FN_DECIMAL(0.6717191355), FN_DECIMAL(0.5101893498), FN_DECIMAL(0.1185768238), FN_DECIMAL(0.236005093), FN_DECIMAL(-0.7811024061), FN_DECIMAL(0.5089325193), FN_DECIMAL(0.6073187658), FN_DECIMAL(-0.7930732557), FN_DECIMAL(-0.6822767155), FN_DECIMAL(0.3201532885), FN_DECIMAL(0.7545302807), FN_DECIMAL(0.1072664448), FN_DECIMAL(0.6784033173), FN_DECIMAL(-0.6595924967),
+	FN_DECIMAL(0.7276509498), FN_DECIMAL(0.5586689436), FN_DECIMAL(-0.6498636788), FN_DECIMAL(0.6789333174), FN_DECIMAL(0.7105966551), FN_DECIMAL(-0.2872214155), FN_DECIMAL(0.496746217), FN_DECIMAL(-0.3880337977), FN_DECIMAL(0.7324070604), FN_DECIMAL(-0.9326634749), FN_DECIMAL(-0.5867839255), FN_DECIMAL(0.8003043651), FN_DECIMAL(-0.1631882481), FN_DECIMAL(-0.6796374681), FN_DECIMAL(-0.8066678503), FN_DECIMAL(0.4238177418),
+	FN_DECIMAL(0.7715863549), FN_DECIMAL(0.5455367347), FN_DECIMAL(-0.03205115397), FN_DECIMAL(-0.6005545066), FN_DECIMAL(-0.5423640002), FN_DECIMAL(0.3569205906), FN_DECIMAL(-0.582071752), FN_DECIMAL(0.6407354361), FN_DECIMAL(0.7777142984), FN_DECIMAL(-0.09956428618), FN_DECIMAL(0.1100002681), FN_DECIMAL(0.8136349123), FN_DECIMAL(0.2923431904), FN_DECIMAL(0.9735794425), FN_DECIMAL(0.8324974864), FN_DECIMAL(-0.6179617717),
+	FN_DECIMAL(-0.9248386523), FN_DECIMAL(-0.6448780771), FN_DECIMAL(-0.5274402761), FN_DECIMAL(-0.7862170565), FN_DECIMAL(0.2682099744), FN_DECIMAL(-0.5848777694), FN_DECIMAL(-0.6364561467), FN_DECIMAL(-0.7167402514), FN_DECIMAL(-0.8677012494), FN_DECIMAL(0.4205286707), FN_DECIMAL(-0.7007832749), FN_DECIMAL(0.243272451), FN_DECIMAL(-0.1899846085), FN_DECIMAL(-0.6146124977), FN_DECIMAL(-0.8093357692), FN_DECIMAL(-0.03545096987),
+	FN_DECIMAL(-0.7191590868), FN_DECIMAL(0.7478645848), FN_DECIMAL(0.3623517328), FN_DECIMAL(0.8436992512), FN_DECIMAL(-0.2445711729), FN_DECIMAL(0.6897356637), FN_DECIMAL(-0.1708070787), FN_DECIMAL(0.4639272368), FN_DECIMAL(-0.7917186656), FN_DECIMAL(0.02980025428), FN_DECIMAL(0.6334156172), FN_DECIMAL(-0.9815544807), FN_DECIMAL(-0.2307217304), FN_DECIMAL(0.1080823318), FN_DECIMAL(0.5167601798), FN_DECIMAL(-0.845120016),
+	FN_DECIMAL(0.441572562), FN_DECIMAL(0.5876789172), FN_DECIMAL(-0.6365908737), FN_DECIMAL(0.68350166), FN_DECIMAL(0.5849723959), FN_DECIMAL(0.1164114357), FN_DECIMAL(-0.7379813884), FN_DECIMAL(-0.9613237178), FN_DECIMAL(-0.9071943084), FN_DECIMAL(-0.7682111105), FN_DECIMAL(0.639074459), FN_DECIMAL(-0.619358298), FN_DECIMAL(0.2807257131), FN_DECIMAL(-0.01800868791), FN_DECIMAL(0.3776607289), FN_DECIMAL(0.7207567823),
+	FN_DECIMAL(0.5536661486), FN_DECIMAL(-0.9974053117), FN_DECIMAL(-0.02047200006), FN_DECIMAL(-0.6739453804), FN_DECIMAL(-0.5607471297), FN_DECIMAL(0.8815553192), FN_DECIMAL(0.8275977415), FN_DECIMAL(0.3928902456), FN_DECIMAL(0.550991396), FN_DECIMAL(0.4247623676), FN_DECIMAL(-0.3436948871), FN_DECIMAL(-0.3653537677), FN_DECIMAL(0.3181702902), FN_DECIMAL(-0.6067173171), FN_DECIMAL(-0.8984128477), FN_DECIMAL(0.4220839766),
+	FN_DECIMAL(0.7238407199), FN_DECIMAL(-0.7766913695), FN_DECIMAL(0.6460037842), FN_DECIMAL(0.2544775664), FN_DECIMAL(0.6488840578), FN_DECIMAL(0.805016833), FN_DECIMAL(-0.9183807036), FN_DECIMAL(0.4144046357), FN_DECIMAL(0.270587208), FN_DECIMAL(-0.8813684494), FN_DECIMAL(0.6985971877), FN_DECIMAL(-0.7795603017), FN_DECIMAL(-0.8624480731), FN_DECIMAL(0.5532697017), FN_DECIMAL(0.711179521), FN_DECIMAL(-0.7798160574),
+	FN_DECIMAL(0.5225859041), FN_DECIMAL(0.1261859368), FN_DECIMAL(0.3398033582), FN_DECIMAL(-0.7472173667), FN_DECIMAL(-0.4032647119), FN_DECIMAL(-0.4246578154), FN_DECIMAL(0.8481212377), FN_DECIMAL(-0.2144838537), FN_DECIMAL(0.3431714491), FN_DECIMAL(0.5310188231), FN_DECIMAL(0.6682978632), FN_DECIMAL(0.3110433206), FN_DECIMAL(0.9263293599), FN_DECIMAL(-0.6155600569), FN_DECIMAL(0.07169784399), FN_DECIMAL(0.8985888773),
+};
+const FN_DECIMAL CELL_3D_Z[] =
+{
+	FN_DECIMAL(-0.6341391283), FN_DECIMAL(-0.7207118346), FN_DECIMAL(0.9597866014), FN_DECIMAL(0.3237504235), FN_DECIMAL(-0.7588642466), FN_DECIMAL(-0.01782410481), FN_DECIMAL(0.2515593809), FN_DECIMAL(0.2207257205), FN_DECIMAL(-0.8579541106), FN_DECIMAL(0.3406410681), FN_DECIMAL(0.7669470462), FN_DECIMAL(-0.9431957648), FN_DECIMAL(0.7676171537), FN_DECIMAL(-0.6000491115), FN_DECIMAL(-0.7062096948), FN_DECIMAL(0.2550207115),
+	FN_DECIMAL(0.7347325213), FN_DECIMAL(0.5163625202), FN_DECIMAL(-0.5394270162), FN_DECIMAL(0.3336656285), FN_DECIMAL(-0.0638635111), FN_DECIMAL(-0.6503195787), FN_DECIMAL(0.3143356798), FN_DECIMAL(-0.5039217245), FN_DECIMAL(0.6605180464), FN_DECIMAL(-0.6855479011), FN_DECIMAL(-0.6693185756), FN_DECIMAL(0.1832083647), FN_DECIMAL(-0.5666258437), FN_DECIMAL(0.3576482138), FN_DECIMAL(-0.6571949095), FN_DECIMAL(-0.7522101635),
+	FN_DECIMAL(-0.7238865886), FN_DECIMAL(0.4538887323), FN_DECIMAL(0.2467106257), FN_DECIMAL(0.7274778869), FN_DECIMAL(0.3151170655), FN_DECIMAL(-0.8802293764), FN_DECIMAL(-0.2167232729), FN_DECIMAL(0.5854637865), FN_DECIMAL(0.7019741052), FN_DECIMAL(0.5091756071), FN_DECIMAL(0.1973189533), FN_DECIMAL(0.46743546), FN_DECIMAL(0.05197599597), FN_DECIMAL(0.088354718), FN_DECIMAL(0.5380464843), FN_DECIMAL(-0.6458224544),
+	FN_DECIMAL(-0.5045952393), FN_DECIMAL(0.419347884), FN_DECIMAL(0.8000823542), FN_DECIMAL(-0.07445020656), FN_DECIMAL(-0.6272881641), FN_DECIMAL(-0.428020311), FN_DECIMAL(-0.2747382083), FN_DECIMAL(-0.08987283726), FN_DECIMAL(0.8699098354), FN_DECIMAL(0.4524761885), FN_DECIMAL(-0.3274603257), FN_DECIMAL(0.4882262167), FN_DECIMAL(-0.7189983256), FN_DECIMAL(0.1746079907), FN_DECIMAL(0.0751772698), FN_DECIMAL(-0.6152927202),
+	FN_DECIMAL(0.4998474673), FN_DECIMAL(-0.6979677227), FN_DECIMAL(0.7568667263), FN_DECIMAL(-0.6152612058), FN_DECIMAL(0.06447140991), FN_DECIMAL(-0.8155744872), FN_DECIMAL(-0.5229602449), FN_DECIMAL(0.6567836838), FN_DECIMAL(-0.4799905631), FN_DECIMAL(0.03153534591), FN_DECIMAL(0.4724992466), FN_DECIMAL(-0.3026458097), FN_DECIMAL(-0.2191225827), FN_DECIMAL(-0.620692287), FN_DECIMAL(0.3107552588), FN_DECIMAL(0.8235670294),
+	FN_DECIMAL(0.6474915988), FN_DECIMAL(-0.5047637941), FN_DECIMAL(0.4911488878), FN_DECIMAL(-0.2307138167), FN_DECIMAL(-0.5216800015), FN_DECIMAL(0.6789305939), FN_DECIMAL(0.9403734863), FN_DECIMAL(0.702390397), FN_DECIMAL(0.7347584625), FN_DECIMAL(0.6779567958), FN_DECIMAL(0.9765635805), FN_DECIMAL(-0.9436177661), FN_DECIMAL(-0.358465925), FN_DECIMAL(-0.3058706624), FN_DECIMAL(0.5533414464), FN_DECIMAL(-0.8838306897),
+	FN_DECIMAL(0.04496841812), FN_DECIMAL(0.01687374963), FN_DECIMAL(-0.9927133148), FN_DECIMAL(-0.211752318), FN_DECIMAL(0.3732015249), FN_DECIMAL(0.9632990593), FN_DECIMAL(-0.07682417004), FN_DECIMAL(-0.07232213047), FN_DECIMAL(0.4733721775), FN_DECIMAL(0.2579229713), FN_DECIMAL(0.7995216286), FN_DECIMAL(0.3928189967), FN_DECIMAL(0.04107517667), FN_DECIMAL(0.1534542912), FN_DECIMAL(0.6468965045), FN_DECIMAL(0.4030684878),
+	FN_DECIMAL(-0.5617300988), FN_DECIMAL(-0.885463029), FN_DECIMAL(0.693729985), FN_DECIMAL(-0.5736527866), FN_DECIMAL(-0.9911905409), FN_DECIMAL(-0.66451538), FN_DECIMAL(0.2028855685), FN_DECIMAL(0.8019541421), FN_DECIMAL(-0.3903877149), FN_DECIMAL(-0.4888495114), FN_DECIMAL(-0.2753714057), FN_DECIMAL(-0.8915202143), FN_DECIMAL(0.5273119089), FN_DECIMAL(0.9363714773), FN_DECIMAL(-0.5212228249), FN_DECIMAL(-0.31642672),
+	FN_DECIMAL(0.2409440761), FN_DECIMAL(-0.703776404), FN_DECIMAL(-0.6996810411), FN_DECIMAL(-0.7058714505), FN_DECIMAL(-0.3650566783), FN_DECIMAL(0.1064744278), FN_DECIMAL(0.7985729102), FN_DECIMAL(0.424680257), FN_DECIMAL(-0.6384535592), FN_DECIMAL(0.1540161646), FN_DECIMAL(-0.07702731943), FN_DECIMAL(-0.5627789132), FN_DECIMAL(-0.7667919169), FN_DECIMAL(-0.509815999), FN_DECIMAL(0.4590525092), FN_DECIMAL(0.1552595611),
+	FN_DECIMAL(0.345402042), FN_DECIMAL(0.7537656024), FN_DECIMAL(0.7906259247), FN_DECIMAL(-0.6218493452), FN_DECIMAL(0.02979350071), FN_DECIMAL(-0.1337893489), FN_DECIMAL(-0.1483818606), FN_DECIMAL(0.549965562), FN_DECIMAL(0.01882482408), FN_DECIMAL(-0.7833783002), FN_DECIMAL(0.4702855809), FN_DECIMAL(0.2435827372), FN_DECIMAL(0.2978428332), FN_DECIMAL(0.2256499906), FN_DECIMAL(0.4885036897), FN_DECIMAL(0.5312962584),
+	FN_DECIMAL(0.05401156992), FN_DECIMAL(0.1749922158), FN_DECIMAL(-0.7352273018), FN_DECIMAL(0.6058980284), FN_DECIMAL(0.4416079111), FN_DECIMAL(0.4417378638), FN_DECIMAL(0.5455879807), FN_DECIMAL(-0.6681295324), FN_DECIMAL(0.1973431441), FN_DECIMAL(0.4053292055), FN_DECIMAL(0.2220375492), FN_DECIMAL(0.2957118467), FN_DECIMAL(0.6910913512), FN_DECIMAL(0.5940890106), FN_DECIMAL(-0.2014135283), FN_DECIMAL(-0.9172588213),
+	FN_DECIMAL(-0.4254361401), FN_DECIMAL(-0.6146586825), FN_DECIMAL(-0.7996193253), FN_DECIMAL(-0.3716777111), FN_DECIMAL(-0.9448876842), FN_DECIMAL(-0.2620349924), FN_DECIMAL(0.9615995749), FN_DECIMAL(-0.4679683524), FN_DECIMAL(0.3905937144), FN_DECIMAL(0.613593722), FN_DECIMAL(0.1422937358), FN_DECIMAL(0.1908754211), FN_DECIMAL(0.8189704912), FN_DECIMAL(-0.7300408736), FN_DECIMAL(-0.4108776451), FN_DECIMAL(-0.5319834504),
+	FN_DECIMAL(-0.8970265651), FN_DECIMAL(-0.5386359045), FN_DECIMAL(0.4082255906), FN_DECIMAL(0.7245356676), FN_DECIMAL(0.5239080873), FN_DECIMAL(-0.8937552226), FN_DECIMAL(-0.553637673), FN_DECIMAL(0.2354455182), FN_DECIMAL(-0.0860293075), FN_DECIMAL(-0.1399373318), FN_DECIMAL(-0.4666323327), FN_DECIMAL(0.5560157407), FN_DECIMAL(0.1772619533), FN_DECIMAL(-0.8893937725), FN_DECIMAL(-0.5632714576), FN_DECIMAL(-0.5666264959),
+	FN_DECIMAL(-0.3670263736), FN_DECIMAL(-0.06717242579), FN_DECIMAL(0.6205295181), FN_DECIMAL(-0.4110536264), FN_DECIMAL(0.7090054553), FN_DECIMAL(0.183899597), FN_DECIMAL(-0.5605470555), FN_DECIMAL(0.3879565548), FN_DECIMAL(0.7420893903), FN_DECIMAL(-0.2347595118), FN_DECIMAL(-0.8577217497), FN_DECIMAL(0.6325590203), FN_DECIMAL(-0.8736152276), FN_DECIMAL(0.7048011129), FN_DECIMAL(-0.06317948268), FN_DECIMAL(0.8753285574),
+	FN_DECIMAL(-0.05843650473), FN_DECIMAL(-0.3674922622), FN_DECIMAL(-0.5256624401), FN_DECIMAL(0.7861039337), FN_DECIMAL(0.3287714416), FN_DECIMAL(0.5910593099), FN_DECIMAL(-0.3896960134), FN_DECIMAL(0.6864605361), FN_DECIMAL(0.7164918431), FN_DECIMAL(-0.290014277), FN_DECIMAL(-0.6796169617), FN_DECIMAL(0.1632515592), FN_DECIMAL(0.04485347486), FN_DECIMAL(0.8320545697), FN_DECIMAL(0.01339408056), FN_DECIMAL(-0.2874989857),
+	FN_DECIMAL(0.615630723), FN_DECIMAL(0.3430367014), FN_DECIMAL(0.8193658136), FN_DECIMAL(-0.5829600957), FN_DECIMAL(0.07911697781), FN_DECIMAL(0.7854296063), FN_DECIMAL(-0.4107442306), FN_DECIMAL(0.4766964066), FN_DECIMAL(-0.9045999527), FN_DECIMAL(-0.1673856787), FN_DECIMAL(0.2828077348), FN_DECIMAL(-0.5902737632), FN_DECIMAL(-0.321506229), FN_DECIMAL(-0.5224513133), FN_DECIMAL(-0.4090169985), FN_DECIMAL(-0.3599685311),
+};
+
+static int FastFloor(FN_DECIMAL f) { return (f >= 0 ? (int)f : (int)f - 1); }
+static int FastRound(FN_DECIMAL f) { return (f >= 0) ? (int)(f + FN_DECIMAL(0.5)) : (int)(f - FN_DECIMAL(0.5)); }
+static int FastAbs(int i) { return abs(i); }
+static FN_DECIMAL FastAbs(FN_DECIMAL f) { return fabs(f); }
+static FN_DECIMAL Lerp(FN_DECIMAL a, FN_DECIMAL b, FN_DECIMAL t) { return a + t * (b - a); }
+static FN_DECIMAL InterpHermiteFunc(FN_DECIMAL t) { return t*t*(3 - 2 * t); }
+static FN_DECIMAL InterpQuinticFunc(FN_DECIMAL t) { return t*t*t*(t*(t * 6 - 15) + 10); }
+static FN_DECIMAL CubicLerp(FN_DECIMAL a, FN_DECIMAL b, FN_DECIMAL c, FN_DECIMAL d, FN_DECIMAL t)
+{
+	FN_DECIMAL p = (d - c) - (a - b);
+	return t * t * t * p + t * t * ((a - b) - p) + t * (c - a) + b;
+}
+
+void FastNoise::SetSeed(int seed)
+{
+	m_seed = seed;
+
+	std::mt19937_64 gen(seed);
+
+	for (int i = 0; i < 256; i++)
+		m_perm[i] = i;
+
+	for (int j = 0; j < 256; j++)
+	{
+        int rng = (int)(gen() % (256 - j));
+		int k = rng + j;
+		int l = m_perm[j];
+		m_perm[j] = m_perm[j + 256] = m_perm[k];
+		m_perm[k] = l;
+		m_perm12[j] = m_perm12[j + 256] = m_perm[j] % 12;
+	}
+}
+
+void FastNoise::CalculateFractalBounding()
+{
+	FN_DECIMAL amp = m_gain;
+	FN_DECIMAL ampFractal = 1.0f;
+	for (int i = 1; i < m_octaves; i++)
+	{
+		ampFractal += amp;
+		amp *= m_gain;
+	}
+	m_fractalBounding = 1.0f / ampFractal;
+}
+
+void FastNoise::SetCellularDistance2Indices(int cellularDistanceIndex0, int cellularDistanceIndex1)
+{
+	m_cellularDistanceIndex0 = std::min(cellularDistanceIndex0, cellularDistanceIndex1);
+	m_cellularDistanceIndex1 = std::max(cellularDistanceIndex0, cellularDistanceIndex1);
+
+	m_cellularDistanceIndex0 = std::min(std::max(m_cellularDistanceIndex0, 0), FN_CELLULAR_INDEX_MAX);
+	m_cellularDistanceIndex1 = std::min(std::max(m_cellularDistanceIndex1, 0), FN_CELLULAR_INDEX_MAX);
+}
+
+void FastNoise::GetCellularDistance2Indices(int& cellularDistanceIndex0, int& cellularDistanceIndex1) const
+{
+	cellularDistanceIndex0 = m_cellularDistanceIndex0;
+	cellularDistanceIndex1 = m_cellularDistanceIndex1;
+}
+
+unsigned char FastNoise::Index2D_12(unsigned char offset, int x, int y) const
+{
+	return m_perm12[(x & 0xff) + m_perm[(y & 0xff) + offset]];
+}
+unsigned char FastNoise::Index3D_12(unsigned char offset, int x, int y, int z) const
+{
+	return m_perm12[(x & 0xff) + m_perm[(y & 0xff) + m_perm[(z & 0xff) + offset]]];
+}
+unsigned char FastNoise::Index4D_32(unsigned char offset, int x, int y, int z, int w) const
+{
+	return m_perm[(x & 0xff) + m_perm[(y & 0xff) + m_perm[(z & 0xff) + m_perm[(w & 0xff) + offset]]]] & 31;
+}
+unsigned char FastNoise::Index2D_256(unsigned char offset, int x, int y) const
+{
+	return m_perm[(x & 0xff) + m_perm[(y & 0xff) + offset]];
+}
+unsigned char FastNoise::Index3D_256(unsigned char offset, int x, int y, int z) const
+{
+	return m_perm[(x & 0xff) + m_perm[(y & 0xff) + m_perm[(z & 0xff) + offset]]];
+}
+unsigned char FastNoise::Index4D_256(unsigned char offset, int x, int y, int z, int w) const
+{
+	return m_perm[(x & 0xff) + m_perm[(y & 0xff) + m_perm[(z & 0xff) + m_perm[(w & 0xff) + offset]]]];
+}
+
+// Hashing
+#define X_PRIME 1619
+#define Y_PRIME 31337
+#define Z_PRIME 6971
+#define W_PRIME 1013
+
+static FN_DECIMAL ValCoord2D(int seed, int x, int y)
+{
+	int n = seed;
+	n ^= X_PRIME * x;
+	n ^= Y_PRIME * y;
+
+	return (n * n * n * 60493) / FN_DECIMAL(2147483648);
+}
+static FN_DECIMAL ValCoord3D(int seed, int x, int y, int z)
+{
+	int n = seed;
+	n ^= X_PRIME * x;
+	n ^= Y_PRIME * y;
+	n ^= Z_PRIME * z;
+
+	return (n * n * n * 60493) / FN_DECIMAL(2147483648);
+}
+static FN_DECIMAL ValCoord4D(int seed, int x, int y, int z, int w)
+{
+	int n = seed;
+	n ^= X_PRIME * x;
+	n ^= Y_PRIME * y;
+	n ^= Z_PRIME * z;
+	n ^= W_PRIME * w;
+
+	return (n * n * n * 60493) / FN_DECIMAL(2147483648);
+}
+
+FN_DECIMAL FastNoise::ValCoord2DFast(unsigned char offset, int x, int y) const
+{
+	return VAL_LUT[Index2D_256(offset, x, y)];
+}
+FN_DECIMAL FastNoise::ValCoord3DFast(unsigned char offset, int x, int y, int z) const
+{
+	return VAL_LUT[Index3D_256(offset, x, y, z)];
+}
+
+FN_DECIMAL FastNoise::GradCoord2D(unsigned char offset, int x, int y, FN_DECIMAL xd, FN_DECIMAL yd) const
+{
+	unsigned char lutPos = Index2D_12(offset, x, y);
+
+	return xd*GRAD_X[lutPos] + yd*GRAD_Y[lutPos];
+}
+FN_DECIMAL FastNoise::GradCoord3D(unsigned char offset, int x, int y, int z, FN_DECIMAL xd, FN_DECIMAL yd, FN_DECIMAL zd) const
+{
+	unsigned char lutPos = Index3D_12(offset, x, y, z);
+
+	return xd*GRAD_X[lutPos] + yd*GRAD_Y[lutPos] + zd*GRAD_Z[lutPos];
+}
+FN_DECIMAL FastNoise::GradCoord4D(unsigned char offset, int x, int y, int z, int w, FN_DECIMAL xd, FN_DECIMAL yd, FN_DECIMAL zd, FN_DECIMAL wd) const
+{
+	unsigned char lutPos = Index4D_32(offset, x, y, z, w) << 2;
+
+	return xd*GRAD_4D[lutPos] + yd*GRAD_4D[lutPos + 1] + zd*GRAD_4D[lutPos + 2] + wd*GRAD_4D[lutPos + 3];
+}
+
+FN_DECIMAL FastNoise::GetNoise(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	x *= m_frequency;
+	y *= m_frequency;
+	z *= m_frequency;
+
+	switch (m_noiseType)
+	{
+	case Value:
+		return SingleValue(0, x, y, z);
+	case ValueFractal:
+		switch (m_fractalType)
+		{
+		case FBM:
+			return SingleValueFractalFBM(x, y, z);
+		case Billow:
+			return SingleValueFractalBillow(x, y, z);
+		case RigidMulti:
+			return SingleValueFractalRigidMulti(x, y, z);
+		default:
+			return 0;
+		}
+	case Perlin:
+		return SinglePerlin(0, x, y, z);
+	case PerlinFractal:
+		switch (m_fractalType)
+		{
+		case FBM:
+			return SinglePerlinFractalFBM(x, y, z);
+		case Billow:
+			return SinglePerlinFractalBillow(x, y, z);
+		case RigidMulti:
+			return SinglePerlinFractalRigidMulti(x, y, z);
+		default:
+			return 0;
+		}
+	case Simplex:
+		return SingleSimplex(0, x, y, z);
+	case SimplexFractal:
+		switch (m_fractalType)
+		{
+		case FBM:
+			return SingleSimplexFractalFBM(x, y, z);
+		case Billow:
+			return SingleSimplexFractalBillow(x, y, z);
+		case RigidMulti:
+			return SingleSimplexFractalRigidMulti(x, y, z);
+		default:
+			return 0;
+		}
+	case Cellular:
+		switch (m_cellularReturnType)
+		{
+		case CellValue:
+		case NoiseLookup:
+		case Distance:
+			return SingleCellular(x, y, z);
+		default:
+			return SingleCellular2Edge(x, y, z);
+		}
+	case WhiteNoise:
+		return GetWhiteNoise(x, y, z);
+	case Cubic:
+		return SingleCubic(0, x, y, z);
+	case CubicFractal:
+		switch (m_fractalType)
+		{
+		case FBM:
+			return SingleCubicFractalFBM(x, y, z);
+		case Billow:
+			return SingleCubicFractalBillow(x, y, z);
+		case RigidMulti:
+			return SingleCubicFractalRigidMulti(x, y, z);
+		}
+	default:
+		return 0;
+	}
+}
+
+FN_DECIMAL FastNoise::GetNoise(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	x *= m_frequency;
+	y *= m_frequency;
+
+	switch (m_noiseType)
+	{
+	case Value:
+		return SingleValue(0, x, y);
+	case ValueFractal:
+		switch (m_fractalType)
+		{
+		case FBM:
+			return SingleValueFractalFBM(x, y);
+		case Billow:
+			return SingleValueFractalBillow(x, y);
+		case RigidMulti:
+			return SingleValueFractalRigidMulti(x, y);
+		}
+	case Perlin:
+		return SinglePerlin(0, x, y);
+	case PerlinFractal:
+		switch (m_fractalType)
+		{
+		case FBM:
+			return SinglePerlinFractalFBM(x, y);
+		case Billow:
+			return SinglePerlinFractalBillow(x, y);
+		case RigidMulti:
+			return SinglePerlinFractalRigidMulti(x, y);
+		}
+	case Simplex:
+		return SingleSimplex(0, x, y);
+	case SimplexFractal:
+		switch (m_fractalType)
+		{
+		case FBM:
+			return SingleSimplexFractalFBM(x, y);
+		case Billow:
+			return SingleSimplexFractalBillow(x, y);
+		case RigidMulti:
+			return SingleSimplexFractalRigidMulti(x, y);
+		}
+	case Cellular:
+		switch (m_cellularReturnType)
+		{
+		case CellValue:
+		case NoiseLookup:
+		case Distance:
+			return SingleCellular(x, y);
+		default:
+			return SingleCellular2Edge(x, y);
+		}
+	case WhiteNoise:
+		return GetWhiteNoise(x, y);
+	case Cubic:
+		return SingleCubic(0, x, y);
+	case CubicFractal:
+		switch (m_fractalType)
+		{
+		case FBM:
+			return SingleCubicFractalFBM(x, y);
+		case Billow:	 
+			return SingleCubicFractalBillow(x, y);
+		case RigidMulti: 
+			return SingleCubicFractalRigidMulti(x, y);
+		}
+	}
+	return 0;
+}
+
+// White Noise
+FN_DECIMAL FastNoise::GetWhiteNoise(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z, FN_DECIMAL w) const
+{
+	return ValCoord4D(m_seed,
+		*reinterpret_cast<int*>(&x) ^ (*reinterpret_cast<int*>(&x) >> 16),
+		*reinterpret_cast<int*>(&y) ^ (*reinterpret_cast<int*>(&y) >> 16),
+		*reinterpret_cast<int*>(&z) ^ (*reinterpret_cast<int*>(&z) >> 16),
+		*reinterpret_cast<int*>(&w) ^ (*reinterpret_cast<int*>(&w) >> 16));
+}
+
+FN_DECIMAL FastNoise::GetWhiteNoise(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	return ValCoord3D(m_seed,
+		*reinterpret_cast<int*>(&x) ^ (*reinterpret_cast<int*>(&x) >> 16),
+		*reinterpret_cast<int*>(&y) ^ (*reinterpret_cast<int*>(&y) >> 16),
+		*reinterpret_cast<int*>(&z) ^ (*reinterpret_cast<int*>(&z) >> 16));
+}
+
+FN_DECIMAL FastNoise::GetWhiteNoise(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	return ValCoord2D(m_seed,
+		*reinterpret_cast<int*>(&x) ^ (*reinterpret_cast<int*>(&x) >> 16),
+		*reinterpret_cast<int*>(&y) ^ (*reinterpret_cast<int*>(&y) >> 16));
+}
+
+FN_DECIMAL FastNoise::GetWhiteNoiseInt(int x, int y, int z, int w) const
+{
+	return ValCoord4D(m_seed, x, y, z, w);
+}
+
+FN_DECIMAL FastNoise::GetWhiteNoiseInt(int x, int y, int z) const
+{
+	return ValCoord3D(m_seed, x, y, z);
+}
+
+FN_DECIMAL FastNoise::GetWhiteNoiseInt(int x, int y) const
+{
+	return ValCoord2D(m_seed, x, y);
+}
+
+// Value Noise
+FN_DECIMAL FastNoise::GetValueFractal(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	x *= m_frequency;
+	y *= m_frequency;
+	z *= m_frequency;
+
+	switch (m_fractalType)
+	{
+	case FBM:
+		return SingleValueFractalFBM(x, y, z);
+	case Billow:
+		return SingleValueFractalBillow(x, y, z);
+	case RigidMulti:
+		return SingleValueFractalRigidMulti(x, y, z);
+	default:
+		return 0;
+	}
+}
+
+FN_DECIMAL FastNoise::SingleValueFractalFBM(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	FN_DECIMAL sum = SingleValue(m_perm[0], x, y, z);
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+		z *= m_lacunarity;
+
+		amp *= m_gain;
+		sum += SingleValue(m_perm[i], x, y, z) * amp;
+	}
+
+	return sum * m_fractalBounding;
+}
+
+FN_DECIMAL FastNoise::SingleValueFractalBillow(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	FN_DECIMAL sum = FastAbs(SingleValue(m_perm[0], x, y, z)) * 2 - 1;
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+		z *= m_lacunarity;
+
+		amp *= m_gain;
+		sum += (FastAbs(SingleValue(m_perm[i], x, y, z)) * 2 - 1) * amp;
+	}
+
+	return sum * m_fractalBounding;
+}
+
+FN_DECIMAL FastNoise::SingleValueFractalRigidMulti(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	FN_DECIMAL sum = 1 - FastAbs(SingleValue(m_perm[0], x, y, z));
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+		z *= m_lacunarity;
+
+		amp *= m_gain;
+		sum -= (1 - FastAbs(SingleValue(m_perm[i], x, y, z))) * amp;
+	}
+
+	return sum;
+}
+
+FN_DECIMAL FastNoise::GetValue(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	return SingleValue(0, x * m_frequency, y * m_frequency, z * m_frequency);
+}
+
+FN_DECIMAL FastNoise::SingleValue(unsigned char offset, FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	int x0 = FastFloor(x);
+	int y0 = FastFloor(y);
+	int z0 = FastFloor(z);
+	int x1 = x0 + 1;
+	int y1 = y0 + 1;
+	int z1 = z0 + 1;
+
+	FN_DECIMAL xs, ys, zs;
+	switch (m_interp)
+	{
+	case Linear:
+		xs = x - (FN_DECIMAL)x0;
+		ys = y - (FN_DECIMAL)y0;
+		zs = z - (FN_DECIMAL)z0;
+		break;
+	case Hermite:
+		xs = InterpHermiteFunc(x - (FN_DECIMAL)x0);
+		ys = InterpHermiteFunc(y - (FN_DECIMAL)y0);
+		zs = InterpHermiteFunc(z - (FN_DECIMAL)z0);
+		break;
+	case Quintic:
+		xs = InterpQuinticFunc(x - (FN_DECIMAL)x0);
+		ys = InterpQuinticFunc(y - (FN_DECIMAL)y0);
+		zs = InterpQuinticFunc(z - (FN_DECIMAL)z0);
+		break;
+	}
+
+	FN_DECIMAL xf00 = Lerp(ValCoord3DFast(offset, x0, y0, z0), ValCoord3DFast(offset, x1, y0, z0), xs);
+	FN_DECIMAL xf10 = Lerp(ValCoord3DFast(offset, x0, y1, z0), ValCoord3DFast(offset, x1, y1, z0), xs);
+	FN_DECIMAL xf01 = Lerp(ValCoord3DFast(offset, x0, y0, z1), ValCoord3DFast(offset, x1, y0, z1), xs);
+	FN_DECIMAL xf11 = Lerp(ValCoord3DFast(offset, x0, y1, z1), ValCoord3DFast(offset, x1, y1, z1), xs);
+
+	FN_DECIMAL yf0 = Lerp(xf00, xf10, ys);
+	FN_DECIMAL yf1 = Lerp(xf01, xf11, ys);
+
+	return Lerp(yf0, yf1, zs);
+}
+
+FN_DECIMAL FastNoise::GetValueFractal(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	x *= m_frequency;
+	y *= m_frequency;
+
+	switch (m_fractalType)
+	{
+	case FBM:
+		return SingleValueFractalFBM(x, y);
+	case Billow:
+		return SingleValueFractalBillow(x, y);
+	case RigidMulti:
+		return SingleValueFractalRigidMulti(x, y);
+	default:
+		return 0;
+	}
+}
+
+FN_DECIMAL FastNoise::SingleValueFractalFBM(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	FN_DECIMAL sum = SingleValue(m_perm[0], x, y);
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+
+		amp *= m_gain;
+		sum += SingleValue(m_perm[i], x, y) * amp;
+	}
+
+	return sum * m_fractalBounding;
+}
+
+FN_DECIMAL FastNoise::SingleValueFractalBillow(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	FN_DECIMAL sum = FastAbs(SingleValue(m_perm[0], x, y)) * 2 - 1;
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+		amp *= m_gain;
+		sum += (FastAbs(SingleValue(m_perm[i], x, y)) * 2 - 1) * amp;
+	}
+
+	return sum * m_fractalBounding;
+}
+
+FN_DECIMAL FastNoise::SingleValueFractalRigidMulti(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	FN_DECIMAL sum = 1 - FastAbs(SingleValue(m_perm[0], x, y));
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+
+		amp *= m_gain;
+		sum -= (1 - FastAbs(SingleValue(m_perm[i], x, y))) * amp;
+	}
+
+	return sum;
+}
+
+FN_DECIMAL FastNoise::GetValue(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	return SingleValue(0, x * m_frequency, y * m_frequency);
+}
+
+FN_DECIMAL FastNoise::SingleValue(unsigned char offset, FN_DECIMAL x, FN_DECIMAL y) const
+{
+	int x0 = FastFloor(x);
+	int y0 = FastFloor(y);
+	int x1 = x0 + 1;
+	int y1 = y0 + 1;
+
+	FN_DECIMAL xs, ys;
+	switch (m_interp)
+	{
+	case Linear:
+		xs = x - (FN_DECIMAL)x0;
+		ys = y - (FN_DECIMAL)y0;
+		break;
+	case Hermite:
+		xs = InterpHermiteFunc(x - (FN_DECIMAL)x0);
+		ys = InterpHermiteFunc(y - (FN_DECIMAL)y0);
+		break;
+	case Quintic:
+		xs = InterpQuinticFunc(x - (FN_DECIMAL)x0);
+		ys = InterpQuinticFunc(y - (FN_DECIMAL)y0);
+		break;
+	}
+
+	FN_DECIMAL xf0 = Lerp(ValCoord2DFast(offset, x0, y0), ValCoord2DFast(offset, x1, y0), xs);
+	FN_DECIMAL xf1 = Lerp(ValCoord2DFast(offset, x0, y1), ValCoord2DFast(offset, x1, y1), xs);
+
+	return Lerp(xf0, xf1, ys);
+}
+
+// Perlin Noise
+FN_DECIMAL FastNoise::GetPerlinFractal(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	x *= m_frequency;
+	y *= m_frequency;
+	z *= m_frequency;
+
+	switch (m_fractalType)
+	{
+	case FBM:
+		return SinglePerlinFractalFBM(x, y, z);
+	case Billow:
+		return SinglePerlinFractalBillow(x, y, z);
+	case RigidMulti:
+		return SinglePerlinFractalRigidMulti(x, y, z);
+	default:
+		return 0;
+	}
+}
+
+FN_DECIMAL FastNoise::SinglePerlinFractalFBM(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	FN_DECIMAL sum = SinglePerlin(m_perm[0], x, y, z);
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+		z *= m_lacunarity;
+
+		amp *= m_gain;
+		sum += SinglePerlin(m_perm[i], x, y, z) * amp;
+	}
+
+	return sum * m_fractalBounding;
+}
+
+FN_DECIMAL FastNoise::SinglePerlinFractalBillow(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	FN_DECIMAL sum = FastAbs(SinglePerlin(m_perm[0], x, y, z)) * 2 - 1;
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+		z *= m_lacunarity;
+
+		amp *= m_gain;
+		sum += (FastAbs(SinglePerlin(m_perm[i], x, y, z)) * 2 - 1) * amp;
+	}
+
+	return sum * m_fractalBounding;
+}
+
+FN_DECIMAL FastNoise::SinglePerlinFractalRigidMulti(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	FN_DECIMAL sum = 1 - FastAbs(SinglePerlin(m_perm[0], x, y, z));
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+		z *= m_lacunarity;
+
+		amp *= m_gain;
+		sum -= (1 - FastAbs(SinglePerlin(m_perm[i], x, y, z))) * amp;
+	}
+
+	return sum;
+}
+
+FN_DECIMAL FastNoise::GetPerlin(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	return SinglePerlin(0, x * m_frequency, y * m_frequency, z * m_frequency);
+}
+
+FN_DECIMAL FastNoise::SinglePerlin(unsigned char offset, FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	int x0 = FastFloor(x);
+	int y0 = FastFloor(y);
+	int z0 = FastFloor(z);
+	int x1 = x0 + 1;
+	int y1 = y0 + 1;
+	int z1 = z0 + 1;
+
+	FN_DECIMAL xs, ys, zs;
+	switch (m_interp)
+	{
+	case Linear:
+		xs = x - (FN_DECIMAL)x0;
+		ys = y - (FN_DECIMAL)y0;
+		zs = z - (FN_DECIMAL)z0;
+		break;
+	case Hermite:
+		xs = InterpHermiteFunc(x - (FN_DECIMAL)x0);
+		ys = InterpHermiteFunc(y - (FN_DECIMAL)y0);
+		zs = InterpHermiteFunc(z - (FN_DECIMAL)z0);
+		break;
+	case Quintic:
+		xs = InterpQuinticFunc(x - (FN_DECIMAL)x0);
+		ys = InterpQuinticFunc(y - (FN_DECIMAL)y0);
+		zs = InterpQuinticFunc(z - (FN_DECIMAL)z0);
+		break;
+	}
+
+	FN_DECIMAL xd0 = x - (FN_DECIMAL)x0;
+	FN_DECIMAL yd0 = y - (FN_DECIMAL)y0;
+	FN_DECIMAL zd0 = z - (FN_DECIMAL)z0;
+	FN_DECIMAL xd1 = xd0 - 1;
+	FN_DECIMAL yd1 = yd0 - 1;
+	FN_DECIMAL zd1 = zd0 - 1;
+
+	FN_DECIMAL xf00 = Lerp(GradCoord3D(offset, x0, y0, z0, xd0, yd0, zd0), GradCoord3D(offset, x1, y0, z0, xd1, yd0, zd0), xs);
+	FN_DECIMAL xf10 = Lerp(GradCoord3D(offset, x0, y1, z0, xd0, yd1, zd0), GradCoord3D(offset, x1, y1, z0, xd1, yd1, zd0), xs);
+	FN_DECIMAL xf01 = Lerp(GradCoord3D(offset, x0, y0, z1, xd0, yd0, zd1), GradCoord3D(offset, x1, y0, z1, xd1, yd0, zd1), xs);
+	FN_DECIMAL xf11 = Lerp(GradCoord3D(offset, x0, y1, z1, xd0, yd1, zd1), GradCoord3D(offset, x1, y1, z1, xd1, yd1, zd1), xs);
+
+	FN_DECIMAL yf0 = Lerp(xf00, xf10, ys);
+	FN_DECIMAL yf1 = Lerp(xf01, xf11, ys);
+
+	return Lerp(yf0, yf1, zs);
+}
+
+FN_DECIMAL FastNoise::GetPerlinFractal(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	x *= m_frequency;
+	y *= m_frequency;
+
+	switch (m_fractalType)
+	{
+	case FBM:
+		return SinglePerlinFractalFBM(x, y);
+	case Billow:
+		return SinglePerlinFractalBillow(x, y);
+	case RigidMulti:
+		return SinglePerlinFractalRigidMulti(x, y);
+	default:
+		return 0;
+	}
+}
+
+FN_DECIMAL FastNoise::SinglePerlinFractalFBM(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	FN_DECIMAL sum = SinglePerlin(m_perm[0], x, y);
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+
+		amp *= m_gain;
+		sum += SinglePerlin(m_perm[i], x, y) * amp;
+	}
+
+	return sum * m_fractalBounding;
+}
+
+FN_DECIMAL FastNoise::SinglePerlinFractalBillow(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	FN_DECIMAL sum = FastAbs(SinglePerlin(m_perm[0], x, y)) * 2 - 1;
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+
+		amp *= m_gain;
+		sum += (FastAbs(SinglePerlin(m_perm[i], x, y)) * 2 - 1) * amp;
+	}
+
+	return sum * m_fractalBounding;
+}
+
+FN_DECIMAL FastNoise::SinglePerlinFractalRigidMulti(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	FN_DECIMAL sum = 1 - FastAbs(SinglePerlin(m_perm[0], x, y));
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+
+		amp *= m_gain;
+		sum -= (1 - FastAbs(SinglePerlin(m_perm[i], x, y))) * amp;
+	}
+
+	return sum;
+}
+
+FN_DECIMAL FastNoise::GetPerlin(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	return SinglePerlin(0, x * m_frequency, y * m_frequency);
+}
+
+FN_DECIMAL FastNoise::SinglePerlin(unsigned char offset, FN_DECIMAL x, FN_DECIMAL y) const
+{
+	int x0 = FastFloor(x);
+	int y0 = FastFloor(y);
+	int x1 = x0 + 1;
+	int y1 = y0 + 1;
+
+	FN_DECIMAL xs, ys;
+	switch (m_interp)
+	{
+	case Linear:
+		xs = x - (FN_DECIMAL)x0;
+		ys = y - (FN_DECIMAL)y0;
+		break;
+	case Hermite:
+		xs = InterpHermiteFunc(x - (FN_DECIMAL)x0);
+		ys = InterpHermiteFunc(y - (FN_DECIMAL)y0);
+		break;
+	case Quintic:
+		xs = InterpQuinticFunc(x - (FN_DECIMAL)x0);
+		ys = InterpQuinticFunc(y - (FN_DECIMAL)y0);
+		break;
+	}
+
+	FN_DECIMAL xd0 = x - (FN_DECIMAL)x0;
+	FN_DECIMAL yd0 = y - (FN_DECIMAL)y0;
+	FN_DECIMAL xd1 = xd0 - 1;
+	FN_DECIMAL yd1 = yd0 - 1;
+
+	FN_DECIMAL xf0 = Lerp(GradCoord2D(offset, x0, y0, xd0, yd0), GradCoord2D(offset, x1, y0, xd1, yd0), xs);
+	FN_DECIMAL xf1 = Lerp(GradCoord2D(offset, x0, y1, xd0, yd1), GradCoord2D(offset, x1, y1, xd1, yd1), xs);
+
+	return Lerp(xf0, xf1, ys);
+}
+
+// Simplex Noise
+
+FN_DECIMAL FastNoise::GetSimplexFractal(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	x *= m_frequency;
+	y *= m_frequency;
+	z *= m_frequency;
+
+	switch (m_fractalType)
+	{
+	case FBM:
+		return SingleSimplexFractalFBM(x, y, z);
+	case Billow:
+		return SingleSimplexFractalBillow(x, y, z);
+	case RigidMulti:
+		return SingleSimplexFractalRigidMulti(x, y, z);
+	default:
+		return 0;
+	}
+}
+
+FN_DECIMAL FastNoise::SingleSimplexFractalFBM(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	FN_DECIMAL sum = SingleSimplex(m_perm[0], x, y, z);
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+		z *= m_lacunarity;
+
+		amp *= m_gain;
+		sum += SingleSimplex(m_perm[i], x, y, z) * amp;
+	}
+
+	return sum * m_fractalBounding;
+}
+
+FN_DECIMAL FastNoise::SingleSimplexFractalBillow(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	FN_DECIMAL sum = FastAbs(SingleSimplex(m_perm[0], x, y, z)) * 2 - 1;
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+		z *= m_lacunarity;
+
+		amp *= m_gain;
+		sum += (FastAbs(SingleSimplex(m_perm[i], x, y, z)) * 2 - 1) * amp;
+	}
+
+	return sum * m_fractalBounding;
+}
+
+FN_DECIMAL FastNoise::SingleSimplexFractalRigidMulti(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	FN_DECIMAL sum = 1 - FastAbs(SingleSimplex(m_perm[0], x, y, z));
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+		z *= m_lacunarity;
+
+		amp *= m_gain;
+		sum -= (1 - FastAbs(SingleSimplex(m_perm[i], x, y, z))) * amp;
+	}
+
+	return sum;
+}
+
+FN_DECIMAL FastNoise::GetSimplex(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	return SingleSimplex(0, x * m_frequency, y * m_frequency, z * m_frequency);
+}
+
+static const FN_DECIMAL F3 = 1 / FN_DECIMAL(3);
+static const FN_DECIMAL G3 = 1 / FN_DECIMAL(6);
+
+FN_DECIMAL FastNoise::SingleSimplex(unsigned char offset, FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	FN_DECIMAL t = (x + y + z) * F3;
+	int i = FastFloor(x + t);
+	int j = FastFloor(y + t);
+	int k = FastFloor(z + t);
+
+	t = (i + j + k) * G3;
+	FN_DECIMAL X0 = i - t;
+	FN_DECIMAL Y0 = j - t;
+	FN_DECIMAL Z0 = k - t;
+
+	FN_DECIMAL x0 = x - X0;
+	FN_DECIMAL y0 = y - Y0;
+	FN_DECIMAL z0 = z - Z0;
+
+	int i1, j1, k1;
+	int i2, j2, k2;
+
+	if (x0 >= y0)
+	{
+		if (y0 >= z0)
+		{
+			i1 = 1; j1 = 0; k1 = 0; i2 = 1; j2 = 1; k2 = 0;
+		}
+		else if (x0 >= z0)
+		{
+			i1 = 1; j1 = 0; k1 = 0; i2 = 1; j2 = 0; k2 = 1;
+		}
+		else // x0 < z0
+		{
+			i1 = 0; j1 = 0; k1 = 1; i2 = 1; j2 = 0; k2 = 1;
+		}
+	}
+	else // x0 < y0
+	{
+		if (y0 < z0)
+		{
+			i1 = 0; j1 = 0; k1 = 1; i2 = 0; j2 = 1; k2 = 1;
+		}
+		else if (x0 < z0)
+		{
+			i1 = 0; j1 = 1; k1 = 0; i2 = 0; j2 = 1; k2 = 1;
+		}
+		else // x0 >= z0
+		{
+			i1 = 0; j1 = 1; k1 = 0; i2 = 1; j2 = 1; k2 = 0;
+		}
+	}
+
+	FN_DECIMAL x1 = x0 - i1 + G3;
+	FN_DECIMAL y1 = y0 - j1 + G3;
+	FN_DECIMAL z1 = z0 - k1 + G3;
+	FN_DECIMAL x2 = x0 - i2 + 2*G3;
+	FN_DECIMAL y2 = y0 - j2 + 2*G3;
+	FN_DECIMAL z2 = z0 - k2 + 2*G3;
+	FN_DECIMAL x3 = x0 - 1 + 3*G3;
+	FN_DECIMAL y3 = y0 - 1 + 3*G3;
+	FN_DECIMAL z3 = z0 - 1 + 3*G3;
+
+	FN_DECIMAL n0, n1, n2, n3;
+
+	t = FN_DECIMAL(0.6) - x0*x0 - y0*y0 - z0*z0;
+	if (t < 0) n0 = 0;
+	else
+	{
+		t *= t;
+		n0 = t*t*GradCoord3D(offset, i, j, k, x0, y0, z0);
+	}
+
+	t = FN_DECIMAL(0.6) - x1*x1 - y1*y1 - z1*z1;
+	if (t < 0) n1 = 0;
+	else
+	{
+		t *= t;
+		n1 = t*t*GradCoord3D(offset, i + i1, j + j1, k + k1, x1, y1, z1);
+	}
+
+	t = FN_DECIMAL(0.6) - x2*x2 - y2*y2 - z2*z2;
+	if (t < 0) n2 = 0;
+	else
+	{
+		t *= t;
+		n2 = t*t*GradCoord3D(offset, i + i2, j + j2, k + k2, x2, y2, z2);
+	}
+
+	t = FN_DECIMAL(0.6) - x3*x3 - y3*y3 - z3*z3;
+	if (t < 0) n3 = 0;
+	else
+	{
+		t *= t;
+		n3 = t*t*GradCoord3D(offset, i + 1, j + 1, k + 1, x3, y3, z3);
+	}
+
+	return 32 * (n0 + n1 + n2 + n3);
+}
+
+FN_DECIMAL FastNoise::GetSimplexFractal(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	x *= m_frequency;
+	y *= m_frequency;
+
+	switch (m_fractalType)
+	{
+	case FBM:
+		return SingleSimplexFractalFBM(x, y);
+	case Billow:
+		return SingleSimplexFractalBillow(x, y);
+	case RigidMulti:
+		return SingleSimplexFractalRigidMulti(x, y);
+	default:
+		return 0;
+	}
+}
+
+FN_DECIMAL FastNoise::SingleSimplexFractalFBM(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	FN_DECIMAL sum = SingleSimplex(m_perm[0], x, y);
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+
+		amp *= m_gain;
+		sum += SingleSimplex(m_perm[i], x, y) * amp;
+	}
+
+	return sum * m_fractalBounding;
+}
+
+FN_DECIMAL FastNoise::SingleSimplexFractalBillow(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	FN_DECIMAL sum = FastAbs(SingleSimplex(m_perm[0], x, y)) * 2 - 1;
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+
+		amp *= m_gain;
+		sum += (FastAbs(SingleSimplex(m_perm[i], x, y)) * 2 - 1) * amp;
+	}
+
+	return sum * m_fractalBounding;
+}
+
+FN_DECIMAL FastNoise::SingleSimplexFractalRigidMulti(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	FN_DECIMAL sum = 1 - FastAbs(SingleSimplex(m_perm[0], x, y));
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+
+		amp *= m_gain;
+		sum -= (1 - FastAbs(SingleSimplex(m_perm[i], x, y))) * amp;
+	}
+
+	return sum;
+}
+
+FN_DECIMAL FastNoise::SingleSimplexFractalBlend(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	FN_DECIMAL sum = SingleSimplex(m_perm[0], x, y);
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+
+		amp *= m_gain;
+		sum *= SingleSimplex(m_perm[i], x, y) * amp + 1;
+	}
+
+	return sum * m_fractalBounding;
+}
+
+FN_DECIMAL FastNoise::GetSimplex(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	return SingleSimplex(0, x * m_frequency, y * m_frequency);
+}
+
+//static const FN_DECIMAL F2 = 1 / FN_DECIMAL(2);
+//static const FN_DECIMAL G2 = 1 / FN_DECIMAL(4);
+
+static const FN_DECIMAL SQRT3 = FN_DECIMAL(1.7320508075688772935274463415059);
+static const FN_DECIMAL F2 = FN_DECIMAL(0.5) * (SQRT3 - FN_DECIMAL(1.0));
+static const FN_DECIMAL G2 = (FN_DECIMAL(3.0) - SQRT3) / FN_DECIMAL(6.0);
+
+FN_DECIMAL FastNoise::SingleSimplex(unsigned char offset, FN_DECIMAL x, FN_DECIMAL y) const
+{
+	FN_DECIMAL t = (x + y) * F2;
+	int i = FastFloor(x + t);
+	int j = FastFloor(y + t);
+
+	t = (i + j) * G2;
+	FN_DECIMAL X0 = i - t;
+	FN_DECIMAL Y0 = j - t;
+
+	FN_DECIMAL x0 = x - X0;
+	FN_DECIMAL y0 = y - Y0;
+
+	int i1, j1;
+	if (x0 > y0)
+	{
+		i1 = 1; j1 = 0;
+	}
+	else
+	{
+		i1 = 0; j1 = 1;
+	}
+
+	FN_DECIMAL x1 = x0 - (FN_DECIMAL)i1 + G2;
+	FN_DECIMAL y1 = y0 - (FN_DECIMAL)j1 + G2;
+	FN_DECIMAL x2 = x0 - 1 + 2*G2;
+	FN_DECIMAL y2 = y0 - 1 + 2*G2;
+
+	FN_DECIMAL n0, n1, n2;
+
+	t = FN_DECIMAL(0.5) - x0*x0 - y0*y0;
+	if (t < 0) n0 = 0;
+	else
+	{
+		t *= t;
+		n0 = t * t * GradCoord2D(offset, i, j, x0, y0);
+	}
+
+	t = FN_DECIMAL(0.5) - x1*x1 - y1*y1;
+	if (t < 0) n1 = 0;
+	else
+	{
+		t *= t;
+		n1 = t*t*GradCoord2D(offset, i + i1, j + j1, x1, y1);
+	}
+
+	t = FN_DECIMAL(0.5) - x2*x2 - y2*y2;
+	if (t < 0) n2 = 0;
+	else
+	{
+		t *= t;
+		n2 = t*t*GradCoord2D(offset, i + 1, j + 1, x2, y2);
+	}
+
+	return 70 * (n0 + n1 + n2);
+}
+
+FN_DECIMAL FastNoise::GetSimplex(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z, FN_DECIMAL w) const
+{
+	return SingleSimplex(0, x * m_frequency, y * m_frequency, z * m_frequency, w * m_frequency);
+}
+
+static const FN_DECIMAL F4 = (sqrt(FN_DECIMAL(5)) - 1) / 4;
+static const FN_DECIMAL G4 = (5 - sqrt(FN_DECIMAL(5))) / 20;
+
+FN_DECIMAL FastNoise::SingleSimplex(unsigned char offset, FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z, FN_DECIMAL w) const
+{
+	FN_DECIMAL n0, n1, n2, n3, n4;
+	FN_DECIMAL t = (x + y + z + w) * F4;
+	int i = FastFloor(x + t);
+	int j = FastFloor(y + t);
+	int k = FastFloor(z + t);
+	int l = FastFloor(w + t);
+	t = (i + j + k + l) * G4;
+	FN_DECIMAL X0 = i - t;
+	FN_DECIMAL Y0 = j - t;
+	FN_DECIMAL Z0 = k - t;
+	FN_DECIMAL W0 = l - t;
+	FN_DECIMAL x0 = x - X0;
+	FN_DECIMAL y0 = y - Y0;
+	FN_DECIMAL z0 = z - Z0;
+	FN_DECIMAL w0 = w - W0;
+
+	int rankx = 0;
+	int ranky = 0;
+	int rankz = 0;
+	int rankw = 0;
+
+	if (x0 > y0) rankx++; else ranky++;
+	if (x0 > z0) rankx++; else rankz++;
+	if (x0 > w0) rankx++; else rankw++;
+	if (y0 > z0) ranky++; else rankz++;
+	if (y0 > w0) ranky++; else rankw++;
+	if (z0 > w0) rankz++; else rankw++;
+
+	int i1 = rankx >= 3 ? 1 : 0;
+	int j1 = ranky >= 3 ? 1 : 0;
+	int k1 = rankz >= 3 ? 1 : 0;
+	int l1 = rankw >= 3 ? 1 : 0;
+
+	int i2 = rankx >= 2 ? 1 : 0;
+	int j2 = ranky >= 2 ? 1 : 0;
+	int k2 = rankz >= 2 ? 1 : 0;
+	int l2 = rankw >= 2 ? 1 : 0;
+
+	int i3 = rankx >= 1 ? 1 : 0;
+	int j3 = ranky >= 1 ? 1 : 0;
+	int k3 = rankz >= 1 ? 1 : 0;
+	int l3 = rankw >= 1 ? 1 : 0;
+
+	FN_DECIMAL x1 = x0 - i1 + G4;
+	FN_DECIMAL y1 = y0 - j1 + G4;
+	FN_DECIMAL z1 = z0 - k1 + G4;
+	FN_DECIMAL w1 = w0 - l1 + G4;
+	FN_DECIMAL x2 = x0 - i2 + 2*G4;
+	FN_DECIMAL y2 = y0 - j2 + 2*G4;
+	FN_DECIMAL z2 = z0 - k2 + 2*G4;
+	FN_DECIMAL w2 = w0 - l2 + 2*G4;
+	FN_DECIMAL x3 = x0 - i3 + 3*G4;
+	FN_DECIMAL y3 = y0 - j3 + 3*G4;
+	FN_DECIMAL z3 = z0 - k3 + 3*G4;
+	FN_DECIMAL w3 = w0 - l3 + 3*G4;
+	FN_DECIMAL x4 = x0 - 1 + 4*G4;
+	FN_DECIMAL y4 = y0 - 1 + 4*G4;
+	FN_DECIMAL z4 = z0 - 1 + 4*G4;
+	FN_DECIMAL w4 = w0 - 1 + 4*G4;
+
+	t = FN_DECIMAL(0.6) - x0*x0 - y0*y0 - z0*z0 - w0*w0;
+	if (t < 0) n0 = 0;
+	else {
+		t *= t;
+		n0 = t * t * GradCoord4D(offset, i, j, k, l, x0, y0, z0, w0);
+	}
+	t = FN_DECIMAL(0.6) - x1*x1 - y1*y1 - z1*z1 - w1*w1;
+	if (t < 0) n1 = 0;
+	else {
+		t *= t;
+		n1 = t * t * GradCoord4D(offset, i + i1, j + j1, k + k1, l + l1, x1, y1, z1, w1);
+	}
+	t = FN_DECIMAL(0.6) - x2*x2 - y2*y2 - z2*z2 - w2*w2;
+	if (t < 0) n2 = 0;
+	else {
+		t *= t;
+		n2 = t * t * GradCoord4D(offset, i + i2, j + j2, k + k2, l + l2, x2, y2, z2, w2);
+	}
+	t = FN_DECIMAL(0.6) - x3*x3 - y3*y3 - z3*z3 - w3*w3;
+	if (t < 0) n3 = 0;
+	else {
+		t *= t;
+		n3 = t * t * GradCoord4D(offset, i + i3, j + j3, k + k3, l + l3, x3, y3, z3, w3);
+	}
+	t = FN_DECIMAL(0.6) - x4*x4 - y4*y4 - z4*z4 - w4*w4;
+	if (t < 0) n4 = 0;
+	else {
+		t *= t;
+		n4 = t * t * GradCoord4D(offset, i + 1, j + 1, k + 1, l + 1, x4, y4, z4, w4);
+	}
+
+	return 27 * (n0 + n1 + n2 + n3 + n4);
+}
+
+// Cubic Noise
+FN_DECIMAL FastNoise::GetCubicFractal(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	x *= m_frequency;
+	y *= m_frequency;
+	z *= m_frequency;
+
+	switch (m_fractalType)
+	{
+	case FBM:
+		return SingleCubicFractalFBM(x, y, z);
+	case Billow:	 
+		return SingleCubicFractalBillow(x, y, z);
+	case RigidMulti: 
+		return SingleCubicFractalRigidMulti(x, y, z);
+	default:
+		return 0;
+	}
+}
+
+FN_DECIMAL FastNoise::SingleCubicFractalFBM(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	FN_DECIMAL sum = SingleCubic(m_perm[0], x, y, z);
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+		z *= m_lacunarity;
+
+		amp *= m_gain;
+		sum += SingleCubic(m_perm[i], x, y, z) * amp;
+	}
+
+	return sum * m_fractalBounding;
+}
+
+FN_DECIMAL FastNoise::SingleCubicFractalBillow(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	FN_DECIMAL sum = FastAbs(SingleCubic(m_perm[0], x, y, z)) * 2 - 1;
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+		z *= m_lacunarity;
+
+		amp *= m_gain;
+		sum += (FastAbs(SingleCubic(m_perm[i], x, y, z)) * 2 - 1) * amp;
+	}
+
+	return sum * m_fractalBounding;
+}
+
+FN_DECIMAL FastNoise::SingleCubicFractalRigidMulti(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	FN_DECIMAL sum = 1 - FastAbs(SingleCubic(m_perm[0], x, y, z));
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+		z *= m_lacunarity;
+
+		amp *= m_gain;
+		sum -= (1 - FastAbs(SingleCubic(m_perm[i], x, y, z))) * amp;
+	}
+
+	return sum;
+}
+
+FN_DECIMAL FastNoise::GetCubic(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	return SingleCubic(0, x * m_frequency, y * m_frequency, z * m_frequency);
+}
+
+const FN_DECIMAL CUBIC_3D_BOUNDING = 1 / (FN_DECIMAL(1.5) * FN_DECIMAL(1.5) * FN_DECIMAL(1.5));
+
+FN_DECIMAL FastNoise::SingleCubic(unsigned char offset, FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	int x1 = FastFloor(x);
+	int y1 = FastFloor(y);
+	int z1 = FastFloor(z);
+
+	int x0 = x1 - 1;
+	int y0 = y1 - 1;
+	int z0 = z1 - 1;
+	int x2 = x1 + 1;
+	int y2 = y1 + 1;
+	int z2 = z1 + 1;
+	int x3 = x1 + 2;
+	int y3 = y1 + 2;
+	int z3 = z1 + 2;
+
+	FN_DECIMAL xs = x - (FN_DECIMAL)x1;
+	FN_DECIMAL ys = y - (FN_DECIMAL)y1;
+	FN_DECIMAL zs = z - (FN_DECIMAL)z1;
+
+	return CubicLerp(
+		CubicLerp(
+		CubicLerp(ValCoord3DFast(offset, x0, y0, z0), ValCoord3DFast(offset, x1, y0, z0), ValCoord3DFast(offset, x2, y0, z0), ValCoord3DFast(offset, x3, y0, z0), xs),
+		CubicLerp(ValCoord3DFast(offset, x0, y1, z0), ValCoord3DFast(offset, x1, y1, z0), ValCoord3DFast(offset, x2, y1, z0), ValCoord3DFast(offset, x3, y1, z0), xs),
+		CubicLerp(ValCoord3DFast(offset, x0, y2, z0), ValCoord3DFast(offset, x1, y2, z0), ValCoord3DFast(offset, x2, y2, z0), ValCoord3DFast(offset, x3, y2, z0), xs),
+		CubicLerp(ValCoord3DFast(offset, x0, y3, z0), ValCoord3DFast(offset, x1, y3, z0), ValCoord3DFast(offset, x2, y3, z0), ValCoord3DFast(offset, x3, y3, z0), xs),
+		ys),
+		CubicLerp(
+		CubicLerp(ValCoord3DFast(offset, x0, y0, z1), ValCoord3DFast(offset, x1, y0, z1), ValCoord3DFast(offset, x2, y0, z1), ValCoord3DFast(offset, x3, y0, z1), xs),
+		CubicLerp(ValCoord3DFast(offset, x0, y1, z1), ValCoord3DFast(offset, x1, y1, z1), ValCoord3DFast(offset, x2, y1, z1), ValCoord3DFast(offset, x3, y1, z1), xs),
+		CubicLerp(ValCoord3DFast(offset, x0, y2, z1), ValCoord3DFast(offset, x1, y2, z1), ValCoord3DFast(offset, x2, y2, z1), ValCoord3DFast(offset, x3, y2, z1), xs),
+		CubicLerp(ValCoord3DFast(offset, x0, y3, z1), ValCoord3DFast(offset, x1, y3, z1), ValCoord3DFast(offset, x2, y3, z1), ValCoord3DFast(offset, x3, y3, z1), xs),
+		ys),
+		CubicLerp(
+		CubicLerp(ValCoord3DFast(offset, x0, y0, z2), ValCoord3DFast(offset, x1, y0, z2), ValCoord3DFast(offset, x2, y0, z2), ValCoord3DFast(offset, x3, y0, z2), xs),
+		CubicLerp(ValCoord3DFast(offset, x0, y1, z2), ValCoord3DFast(offset, x1, y1, z2), ValCoord3DFast(offset, x2, y1, z2), ValCoord3DFast(offset, x3, y1, z2), xs),
+		CubicLerp(ValCoord3DFast(offset, x0, y2, z2), ValCoord3DFast(offset, x1, y2, z2), ValCoord3DFast(offset, x2, y2, z2), ValCoord3DFast(offset, x3, y2, z2), xs),
+		CubicLerp(ValCoord3DFast(offset, x0, y3, z2), ValCoord3DFast(offset, x1, y3, z2), ValCoord3DFast(offset, x2, y3, z2), ValCoord3DFast(offset, x3, y3, z2), xs),
+		ys),
+		CubicLerp(
+		CubicLerp(ValCoord3DFast(offset, x0, y0, z3), ValCoord3DFast(offset, x1, y0, z3), ValCoord3DFast(offset, x2, y0, z3), ValCoord3DFast(offset, x3, y0, z3), xs),
+		CubicLerp(ValCoord3DFast(offset, x0, y1, z3), ValCoord3DFast(offset, x1, y1, z3), ValCoord3DFast(offset, x2, y1, z3), ValCoord3DFast(offset, x3, y1, z3), xs),
+		CubicLerp(ValCoord3DFast(offset, x0, y2, z3), ValCoord3DFast(offset, x1, y2, z3), ValCoord3DFast(offset, x2, y2, z3), ValCoord3DFast(offset, x3, y2, z3), xs),
+		CubicLerp(ValCoord3DFast(offset, x0, y3, z3), ValCoord3DFast(offset, x1, y3, z3), ValCoord3DFast(offset, x2, y3, z3), ValCoord3DFast(offset, x3, y3, z3), xs),
+		ys),
+		zs) * CUBIC_3D_BOUNDING;
+}
+
+
+FN_DECIMAL FastNoise::GetCubicFractal(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	x *= m_frequency;
+	y *= m_frequency;
+
+	switch (m_fractalType)
+	{
+	case FBM:
+		return SingleCubicFractalFBM(x, y);
+	case Billow:	 
+		return SingleCubicFractalBillow(x, y);
+	case RigidMulti: 
+		return SingleCubicFractalRigidMulti(x, y);
+	default:
+		return 0;
+	}
+}
+
+FN_DECIMAL FastNoise::SingleCubicFractalFBM(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	FN_DECIMAL sum = SingleCubic(m_perm[0], x, y);
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+
+		amp *= m_gain;
+		sum += SingleCubic(m_perm[i], x, y) * amp;
+	}
+
+	return sum * m_fractalBounding;
+}
+
+FN_DECIMAL FastNoise::SingleCubicFractalBillow(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	FN_DECIMAL sum = FastAbs(SingleCubic(m_perm[0], x, y)) * 2 - 1;
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+
+		amp *= m_gain;
+		sum += (FastAbs(SingleCubic(m_perm[i], x, y)) * 2 - 1) * amp;
+	}
+
+	return sum * m_fractalBounding;
+}
+
+FN_DECIMAL FastNoise::SingleCubicFractalRigidMulti(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	FN_DECIMAL sum = 1 - FastAbs(SingleCubic(m_perm[0], x, y));
+	FN_DECIMAL amp = 1;
+	int i = 0;
+
+	while (++i < m_octaves)
+	{
+		x *= m_lacunarity;
+		y *= m_lacunarity;
+
+		amp *= m_gain;
+		sum -= (1 - FastAbs(SingleCubic(m_perm[i], x, y))) * amp;
+	}
+
+	return sum;
+}
+
+FN_DECIMAL FastNoise::GetCubic(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	x *= m_frequency;
+	y *= m_frequency;
+
+	return SingleCubic(0, x, y);
+}
+
+const FN_DECIMAL CUBIC_2D_BOUNDING = 1 / (FN_DECIMAL(1.5) * FN_DECIMAL(1.5));
+
+FN_DECIMAL FastNoise::SingleCubic(unsigned char offset, FN_DECIMAL x, FN_DECIMAL y) const
+{
+	int x1 = FastFloor(x);
+	int y1 = FastFloor(y);
+
+	int x0 = x1 - 1;
+	int y0 = y1 - 1;
+	int x2 = x1 + 1;
+	int y2 = y1 + 1;
+	int x3 = x1 + 2;
+	int y3 = y1 + 2;
+
+	FN_DECIMAL xs = x - (FN_DECIMAL)x1;
+	FN_DECIMAL ys = y - (FN_DECIMAL)y1;
+
+	return CubicLerp(
+		CubicLerp(ValCoord2DFast(offset, x0, y0), ValCoord2DFast(offset, x1, y0), ValCoord2DFast(offset, x2, y0), ValCoord2DFast(offset, x3, y0), xs),
+		CubicLerp(ValCoord2DFast(offset, x0, y1), ValCoord2DFast(offset, x1, y1), ValCoord2DFast(offset, x2, y1), ValCoord2DFast(offset, x3, y1), xs),
+		CubicLerp(ValCoord2DFast(offset, x0, y2), ValCoord2DFast(offset, x1, y2), ValCoord2DFast(offset, x2, y2), ValCoord2DFast(offset, x3, y2), xs),
+		CubicLerp(ValCoord2DFast(offset, x0, y3), ValCoord2DFast(offset, x1, y3), ValCoord2DFast(offset, x2, y3), ValCoord2DFast(offset, x3, y3), xs),
+		ys) * CUBIC_2D_BOUNDING;
+}
+
+// Cellular Noise
+FN_DECIMAL FastNoise::GetCellular(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	x *= m_frequency;
+	y *= m_frequency;
+	z *= m_frequency;
+
+	switch (m_cellularReturnType)
+	{
+	case CellValue:
+	case NoiseLookup:
+	case Distance:
+		return SingleCellular(x, y, z);
+	default:
+		return SingleCellular2Edge(x, y, z);
+	}
+}
+
+FN_DECIMAL FastNoise::SingleCellular(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	int xr = FastRound(x);
+	int yr = FastRound(y);
+	int zr = FastRound(z);
+
+	FN_DECIMAL distance = 999999;
+	int xc, yc, zc;
+
+	switch (m_cellularDistanceFunction)
+	{
+	case Euclidean:
+		for (int xi = xr - 1; xi <= xr + 1; xi++)
+		{
+			for (int yi = yr - 1; yi <= yr + 1; yi++)
+			{
+				for (int zi = zr - 1; zi <= zr + 1; zi++)
+				{
+					unsigned char lutPos = Index3D_256(0, xi, yi, zi);
+
+					FN_DECIMAL vecX = xi - x + CELL_3D_X[lutPos] * m_cellularJitter;
+					FN_DECIMAL vecY = yi - y + CELL_3D_Y[lutPos] * m_cellularJitter;
+					FN_DECIMAL vecZ = zi - z + CELL_3D_Z[lutPos] * m_cellularJitter;
+
+					FN_DECIMAL newDistance = vecX * vecX + vecY * vecY + vecZ * vecZ;
+
+					if (newDistance < distance)
+					{
+						distance = newDistance;
+						xc = xi;
+						yc = yi;
+						zc = zi;
+					}
+				}
+			}
+		}
+		break;
+	case Manhattan:
+		for (int xi = xr - 1; xi <= xr + 1; xi++)
+		{
+			for (int yi = yr - 1; yi <= yr + 1; yi++)
+			{
+				for (int zi = zr - 1; zi <= zr + 1; zi++)
+				{
+					unsigned char lutPos = Index3D_256(0, xi, yi, zi);
+
+					FN_DECIMAL vecX = xi - x + CELL_3D_X[lutPos] * m_cellularJitter;
+					FN_DECIMAL vecY = yi - y + CELL_3D_Y[lutPos] * m_cellularJitter;
+					FN_DECIMAL vecZ = zi - z + CELL_3D_Z[lutPos] * m_cellularJitter;
+
+					FN_DECIMAL newDistance = FastAbs(vecX) + FastAbs(vecY) + FastAbs(vecZ);
+
+					if (newDistance < distance)
+					{
+						distance = newDistance;
+						xc = xi;
+						yc = yi;
+						zc = zi;
+					}
+				}
+			}
+		}
+		break;
+	case Natural:
+		for (int xi = xr - 1; xi <= xr + 1; xi++)
+		{
+			for (int yi = yr - 1; yi <= yr + 1; yi++)
+			{
+				for (int zi = zr - 1; zi <= zr + 1; zi++)
+				{
+					unsigned char lutPos = Index3D_256(0, xi, yi, zi);
+
+					FN_DECIMAL vecX = xi - x + CELL_3D_X[lutPos] * m_cellularJitter;
+					FN_DECIMAL vecY = yi - y + CELL_3D_Y[lutPos] * m_cellularJitter;
+					FN_DECIMAL vecZ = zi - z + CELL_3D_Z[lutPos] * m_cellularJitter;
+
+					FN_DECIMAL newDistance = (FastAbs(vecX) + FastAbs(vecY) + FastAbs(vecZ)) + (vecX * vecX + vecY * vecY + vecZ * vecZ);
+
+					if (newDistance < distance)
+					{
+						distance = newDistance;
+						xc = xi;
+						yc = yi;
+						zc = zi;
+					}
+				}
+			}
+		}
+		break;
+	default:
+		break;
+	}
+
+	unsigned char lutPos;
+	switch (m_cellularReturnType)
+	{
+	case CellValue:
+		return ValCoord3D(m_seed, xc, yc, zc);
+
+	case NoiseLookup:
+		assert(m_cellularNoiseLookup);
+
+		lutPos = Index3D_256(0, xc, yc, zc);
+		return m_cellularNoiseLookup->GetNoise(xc + CELL_3D_X[lutPos] * m_cellularJitter, yc + CELL_3D_Y[lutPos] * m_cellularJitter, zc + CELL_3D_Z[lutPos] * m_cellularJitter);
+
+	case Distance:
+		return distance;
+	default:
+		return 0;
+	}
+}
+
+FN_DECIMAL FastNoise::SingleCellular2Edge(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const
+{
+	int xr = FastRound(x);
+	int yr = FastRound(y);
+	int zr = FastRound(z);
+
+	FN_DECIMAL distance[FN_CELLULAR_INDEX_MAX+1] = { 999999,999999,999999,999999 };
+
+	switch (m_cellularDistanceFunction)
+	{
+	case Euclidean:
+		for (int xi = xr - 1; xi <= xr + 1; xi++)
+		{
+			for (int yi = yr - 1; yi <= yr + 1; yi++)
+			{
+				for (int zi = zr - 1; zi <= zr + 1; zi++)
+				{
+					unsigned char lutPos = Index3D_256(0, xi, yi, zi);
+
+					FN_DECIMAL vecX = xi - x + CELL_3D_X[lutPos] * m_cellularJitter;
+					FN_DECIMAL vecY = yi - y + CELL_3D_Y[lutPos] * m_cellularJitter;
+					FN_DECIMAL vecZ = zi - z + CELL_3D_Z[lutPos] * m_cellularJitter;
+
+					FN_DECIMAL newDistance = vecX * vecX + vecY * vecY + vecZ * vecZ;
+
+					for (int i = m_cellularDistanceIndex1; i > 0; i--)
+						distance[i] = fmax(fmin(distance[i], newDistance), distance[i - 1]);
+					distance[0] = fmin(distance[0], newDistance); 
+				}
+			}
+		}
+		break;
+	case Manhattan:
+		for (int xi = xr - 1; xi <= xr + 1; xi++)
+		{
+			for (int yi = yr - 1; yi <= yr + 1; yi++)
+			{
+				for (int zi = zr - 1; zi <= zr + 1; zi++)
+				{
+					unsigned char lutPos = Index3D_256(0, xi, yi, zi);
+
+					FN_DECIMAL vecX = xi - x + CELL_3D_X[lutPos] * m_cellularJitter;
+					FN_DECIMAL vecY = yi - y + CELL_3D_Y[lutPos] * m_cellularJitter;
+					FN_DECIMAL vecZ = zi - z + CELL_3D_Z[lutPos] * m_cellularJitter;
+
+					FN_DECIMAL newDistance = FastAbs(vecX) + FastAbs(vecY) + FastAbs(vecZ);
+
+					for (int i = m_cellularDistanceIndex1; i > 0; i--)
+						distance[i] = fmax(fmin(distance[i], newDistance), distance[i - 1]);
+					distance[0] = fmin(distance[0], newDistance);
+				}
+			}
+		}
+		break;
+	case Natural:
+		for (int xi = xr - 1; xi <= xr + 1; xi++)
+		{
+			for (int yi = yr - 1; yi <= yr + 1; yi++)
+			{
+				for (int zi = zr - 1; zi <= zr + 1; zi++)
+				{
+					unsigned char lutPos = Index3D_256(0, xi, yi, zi);
+
+					FN_DECIMAL vecX = xi - x + CELL_3D_X[lutPos] * m_cellularJitter;
+					FN_DECIMAL vecY = yi - y + CELL_3D_Y[lutPos] * m_cellularJitter;
+					FN_DECIMAL vecZ = zi - z + CELL_3D_Z[lutPos] * m_cellularJitter;
+
+					FN_DECIMAL newDistance = (FastAbs(vecX) + FastAbs(vecY) + FastAbs(vecZ)) + (vecX * vecX + vecY * vecY + vecZ * vecZ);
+
+					for (int i = m_cellularDistanceIndex1; i > 0; i--)
+						distance[i] = fmax(fmin(distance[i], newDistance), distance[i - 1]);
+					distance[0] = fmin(distance[0], newDistance);
+				}
+			}
+		}
+		break;
+	default:
+		break;
+	}
+
+	switch (m_cellularReturnType)
+	{
+	case Distance2:
+		return distance[m_cellularDistanceIndex1];
+	case Distance2Add:
+		return distance[m_cellularDistanceIndex1] + distance[m_cellularDistanceIndex0];
+	case Distance2Sub:
+		return distance[m_cellularDistanceIndex1] - distance[m_cellularDistanceIndex0];
+	case Distance2Mul:
+		return distance[m_cellularDistanceIndex1] * distance[m_cellularDistanceIndex0];
+	case Distance2Div:
+		return distance[m_cellularDistanceIndex0] / distance[m_cellularDistanceIndex1];
+	default:
+		return 0;
+	}
+}
+
+FN_DECIMAL FastNoise::GetCellular(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	x *= m_frequency;
+	y *= m_frequency;
+
+	switch (m_cellularReturnType)
+	{
+	case CellValue:
+	case NoiseLookup:
+	case Distance:
+		return SingleCellular(x, y);
+	default:
+		return SingleCellular2Edge(x, y);
+	}
+}
+
+FN_DECIMAL FastNoise::SingleCellular(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	int xr = FastRound(x);
+	int yr = FastRound(y);
+
+	FN_DECIMAL distance = 999999;
+	int xc, yc;
+
+	switch (m_cellularDistanceFunction)
+	{
+	default:
+	case Euclidean:
+		for (int xi = xr - 1; xi <= xr + 1; xi++)
+		{
+			for (int yi = yr - 1; yi <= yr + 1; yi++)
+			{
+				unsigned char lutPos = Index2D_256(0, xi, yi);
+
+				FN_DECIMAL vecX = xi - x + CELL_2D_X[lutPos] * m_cellularJitter;
+				FN_DECIMAL vecY = yi - y + CELL_2D_Y[lutPos] * m_cellularJitter;
+															
+				FN_DECIMAL newDistance = vecX * vecX + vecY * vecY;
+
+				if (newDistance < distance)
+				{
+					distance = newDistance;
+					xc = xi;
+					yc = yi;
+				}
+			}
+		}
+		break;
+	case Manhattan:
+		for (int xi = xr - 1; xi <= xr + 1; xi++)
+		{
+			for (int yi = yr - 1; yi <= yr + 1; yi++)
+			{
+				unsigned char lutPos = Index2D_256(0, xi, yi);
+
+				FN_DECIMAL vecX = xi - x + CELL_2D_X[lutPos] * m_cellularJitter;
+				FN_DECIMAL vecY = yi - y + CELL_2D_Y[lutPos] * m_cellularJitter;
+
+				FN_DECIMAL newDistance = (FastAbs(vecX) + FastAbs(vecY));
+
+				if (newDistance < distance)
+				{
+					distance = newDistance;
+					xc = xi;
+					yc = yi;
+				}
+			}
+		}
+		break;
+	case Natural:
+		for (int xi = xr - 1; xi <= xr + 1; xi++)
+		{
+			for (int yi = yr - 1; yi <= yr + 1; yi++)
+			{
+				unsigned char lutPos = Index2D_256(0, xi, yi);
+
+				FN_DECIMAL vecX = xi - x + CELL_2D_X[lutPos] * m_cellularJitter;
+				FN_DECIMAL vecY = yi - y + CELL_2D_Y[lutPos] * m_cellularJitter;
+
+				FN_DECIMAL newDistance = (FastAbs(vecX) + FastAbs(vecY)) + (vecX * vecX + vecY * vecY);
+
+				if (newDistance < distance)
+				{
+					distance = newDistance;
+					xc = xi;
+					yc = yi;
+				}
+			}
+		}
+		break;
+	}
+
+	unsigned char lutPos;
+	switch (m_cellularReturnType)
+	{
+	case CellValue:
+		return ValCoord2D(m_seed, xc, yc);
+
+	case NoiseLookup:
+		assert(m_cellularNoiseLookup);
+
+		lutPos = Index2D_256(0, xc, yc);
+		return m_cellularNoiseLookup->GetNoise(xc + CELL_2D_X[lutPos] * m_cellularJitter, yc + CELL_2D_Y[lutPos] * m_cellularJitter);
+
+	case Distance:
+		return distance;
+	default:
+		return 0;
+	}
+}
+
+FN_DECIMAL FastNoise::SingleCellular2Edge(FN_DECIMAL x, FN_DECIMAL y) const
+{
+	int xr = FastRound(x);
+	int yr = FastRound(y);
+
+	FN_DECIMAL distance[FN_CELLULAR_INDEX_MAX + 1] = { 999999,999999,999999,999999 };
+
+	switch (m_cellularDistanceFunction)
+	{
+	default:
+	case Euclidean:
+		for (int xi = xr - 1; xi <= xr + 1; xi++)
+		{
+			for (int yi = yr - 1; yi <= yr + 1; yi++)
+			{
+				unsigned char lutPos = Index2D_256(0, xi, yi);
+
+				FN_DECIMAL vecX = xi - x + CELL_2D_X[lutPos] * m_cellularJitter;
+				FN_DECIMAL vecY = yi - y + CELL_2D_Y[lutPos] * m_cellularJitter;
+
+				FN_DECIMAL newDistance = vecX * vecX + vecY * vecY;
+
+				for (int i = m_cellularDistanceIndex1; i > 0; i--)
+					distance[i] = fmax(fmin(distance[i], newDistance), distance[i - 1]);
+				distance[0] = fmin(distance[0], newDistance);
+			}
+		}
+		break;
+	case Manhattan:
+		for (int xi = xr - 1; xi <= xr + 1; xi++)
+		{
+			for (int yi = yr - 1; yi <= yr + 1; yi++)
+			{
+				unsigned char lutPos = Index2D_256(0, xi, yi);
+
+				FN_DECIMAL vecX = xi - x + CELL_2D_X[lutPos] * m_cellularJitter;
+				FN_DECIMAL vecY = yi - y + CELL_2D_Y[lutPos] * m_cellularJitter;
+
+				FN_DECIMAL newDistance = FastAbs(vecX) + FastAbs(vecY);
+
+				for (int i = m_cellularDistanceIndex1; i > 0; i--)
+					distance[i] = fmax(fmin(distance[i], newDistance), distance[i - 1]);
+				distance[0] = fmin(distance[0], newDistance);
+			}
+		}
+		break;
+	case Natural:
+		for (int xi = xr - 1; xi <= xr + 1; xi++)
+		{
+			for (int yi = yr - 1; yi <= yr + 1; yi++)
+			{
+				unsigned char lutPos = Index2D_256(0, xi, yi);
+
+				FN_DECIMAL vecX = xi - x + CELL_2D_X[lutPos] * m_cellularJitter;
+				FN_DECIMAL vecY = yi - y + CELL_2D_Y[lutPos] * m_cellularJitter;
+
+				FN_DECIMAL newDistance = (FastAbs(vecX) + FastAbs(vecY)) + (vecX * vecX + vecY * vecY);
+
+				for (int i = m_cellularDistanceIndex1; i > 0; i--)
+					distance[i] = fmax(fmin(distance[i], newDistance), distance[i - 1]);
+				distance[0] = fmin(distance[0], newDistance);
+			}
+		}
+		break;
+	}
+
+	switch (m_cellularReturnType)
+	{
+	case Distance2:
+		return distance[m_cellularDistanceIndex1];
+	case Distance2Add:
+		return distance[m_cellularDistanceIndex1] + distance[m_cellularDistanceIndex0];
+	case Distance2Sub:
+		return distance[m_cellularDistanceIndex1] - distance[m_cellularDistanceIndex0];
+	case Distance2Mul:
+		return distance[m_cellularDistanceIndex1] * distance[m_cellularDistanceIndex0];
+	case Distance2Div:
+		return distance[m_cellularDistanceIndex0] / distance[m_cellularDistanceIndex1];
+	default:
+		return 0;
+	}
+}
+
+void FastNoise::GradientPerturb(FN_DECIMAL& x, FN_DECIMAL& y, FN_DECIMAL& z) const
+{
+	SingleGradientPerturb(0, m_gradientPerturbAmp, m_frequency, x, y, z);
+}
+
+void FastNoise::GradientPerturbFractal(FN_DECIMAL& x, FN_DECIMAL& y, FN_DECIMAL& z) const
+{
+	FN_DECIMAL amp = m_gradientPerturbAmp * m_fractalBounding;
+	FN_DECIMAL freq = m_frequency;
+	int i = 0;
+
+	SingleGradientPerturb(m_perm[0], amp, m_frequency, x, y, z);
+
+	while (++i < m_octaves)
+	{
+		freq *= m_lacunarity;
+		amp *= m_gain;
+		SingleGradientPerturb(m_perm[i], amp, freq, x, y, z);
+	}
+}
+
+void FastNoise::SingleGradientPerturb(unsigned char offset, FN_DECIMAL warpAmp, FN_DECIMAL frequency, FN_DECIMAL& x, FN_DECIMAL& y, FN_DECIMAL& z) const
+{
+	FN_DECIMAL xf = x * frequency;
+	FN_DECIMAL yf = y * frequency;
+	FN_DECIMAL zf = z * frequency;
+
+	int x0 = FastFloor(xf);
+	int y0 = FastFloor(yf);
+	int z0 = FastFloor(zf);
+	int x1 = x0 + 1;
+	int y1 = y0 + 1;
+	int z1 = z0 + 1;
+
+	FN_DECIMAL xs, ys, zs;
+	switch (m_interp)
+	{
+	default:
+	case Linear:
+		xs = xf - (FN_DECIMAL)x0;
+		ys = yf - (FN_DECIMAL)y0;
+		zs = zf - (FN_DECIMAL)z0;
+		break;
+	case Hermite:
+		xs = InterpHermiteFunc(xf - (FN_DECIMAL)x0);
+		ys = InterpHermiteFunc(yf - (FN_DECIMAL)y0);
+		zs = InterpHermiteFunc(zf - (FN_DECIMAL)z0);
+		break;
+	case Quintic:
+		xs = InterpQuinticFunc(xf - (FN_DECIMAL)x0);
+		ys = InterpQuinticFunc(yf - (FN_DECIMAL)y0);
+		zs = InterpQuinticFunc(zf - (FN_DECIMAL)z0);
+		break;
+	}
+
+	int lutPos0 = Index3D_256(offset, x0, y0, z0);
+	int lutPos1 = Index3D_256(offset, x1, y0, z0);
+
+	FN_DECIMAL lx0x = Lerp(CELL_3D_X[lutPos0], CELL_3D_X[lutPos1], xs);
+	FN_DECIMAL ly0x = Lerp(CELL_3D_Y[lutPos0], CELL_3D_Y[lutPos1], xs);
+	FN_DECIMAL lz0x = Lerp(CELL_3D_Z[lutPos0], CELL_3D_Z[lutPos1], xs);
+
+	lutPos0 = Index3D_256(offset, x0, y1, z0);
+	lutPos1 = Index3D_256(offset, x1, y1, z0);
+
+	FN_DECIMAL lx1x = Lerp(CELL_3D_X[lutPos0], CELL_3D_X[lutPos1], xs);
+	FN_DECIMAL ly1x = Lerp(CELL_3D_Y[lutPos0], CELL_3D_Y[lutPos1], xs);
+	FN_DECIMAL lz1x = Lerp(CELL_3D_Z[lutPos0], CELL_3D_Z[lutPos1], xs);
+
+	FN_DECIMAL lx0y = Lerp(lx0x, lx1x, ys);
+	FN_DECIMAL ly0y = Lerp(ly0x, ly1x, ys);
+	FN_DECIMAL lz0y = Lerp(lz0x, lz1x, ys);
+
+	lutPos0 = Index3D_256(offset, x0, y0, z1);
+	lutPos1 = Index3D_256(offset, x1, y0, z1);
+
+	lx0x = Lerp(CELL_3D_X[lutPos0], CELL_3D_X[lutPos1], xs);
+	ly0x = Lerp(CELL_3D_Y[lutPos0], CELL_3D_Y[lutPos1], xs);
+	lz0x = Lerp(CELL_3D_Z[lutPos0], CELL_3D_Z[lutPos1], xs);
+
+	lutPos0 = Index3D_256(offset, x0, y1, z1);
+	lutPos1 = Index3D_256(offset, x1, y1, z1);
+
+	lx1x = Lerp(CELL_3D_X[lutPos0], CELL_3D_X[lutPos1], xs);
+	ly1x = Lerp(CELL_3D_Y[lutPos0], CELL_3D_Y[lutPos1], xs);
+	lz1x = Lerp(CELL_3D_Z[lutPos0], CELL_3D_Z[lutPos1], xs);
+
+	x += Lerp(lx0y, Lerp(lx0x, lx1x, ys), zs) * warpAmp;
+	y += Lerp(ly0y, Lerp(ly0x, ly1x, ys), zs) * warpAmp;
+	z += Lerp(lz0y, Lerp(lz0x, lz1x, ys), zs) * warpAmp;
+}
+
+void FastNoise::GradientPerturb(FN_DECIMAL& x, FN_DECIMAL& y) const
+{
+	SingleGradientPerturb(0, m_gradientPerturbAmp, m_frequency, x, y);
+}
+
+void FastNoise::GradientPerturbFractal(FN_DECIMAL& x, FN_DECIMAL& y) const
+{
+	FN_DECIMAL amp = m_gradientPerturbAmp * m_fractalBounding;
+	FN_DECIMAL freq = m_frequency;
+	int i = 0;
+
+	SingleGradientPerturb(m_perm[0], amp, m_frequency, x, y);
+
+	while (++i < m_octaves)
+	{
+		freq *= m_lacunarity;
+		amp *= m_gain;
+		SingleGradientPerturb(m_perm[i], amp, freq, x, y);
+	}
+}
+
+void FastNoise::SingleGradientPerturb(unsigned char offset, FN_DECIMAL warpAmp, FN_DECIMAL frequency, FN_DECIMAL& x, FN_DECIMAL& y) const
+{
+	FN_DECIMAL xf = x * frequency;
+	FN_DECIMAL yf = y * frequency;
+
+	int x0 = FastFloor(xf);
+	int y0 = FastFloor(yf);
+	int x1 = x0 + 1;
+	int y1 = y0 + 1;
+
+	FN_DECIMAL xs, ys;
+	switch (m_interp)
+	{
+	default:
+	case Linear:
+		xs = xf - (FN_DECIMAL)x0;
+		ys = yf - (FN_DECIMAL)y0;
+		break;
+	case Hermite:
+		xs = InterpHermiteFunc(xf - (FN_DECIMAL)x0);
+		ys = InterpHermiteFunc(yf - (FN_DECIMAL)y0);
+		break;
+	case Quintic:
+		xs = InterpQuinticFunc(xf - (FN_DECIMAL)x0);
+		ys = InterpQuinticFunc(yf - (FN_DECIMAL)y0);
+		break;
+	}
+
+	int lutPos0 = Index2D_256(offset, x0, y0);
+	int lutPos1 = Index2D_256(offset, x1, y0);
+
+	FN_DECIMAL lx0x = Lerp(CELL_2D_X[lutPos0], CELL_2D_X[lutPos1], xs);
+	FN_DECIMAL ly0x = Lerp(CELL_2D_Y[lutPos0], CELL_2D_Y[lutPos1], xs);
+
+	lutPos0 = Index2D_256(offset, x0, y1);
+	lutPos1 = Index2D_256(offset, x1, y1);
+
+	FN_DECIMAL lx1x = Lerp(CELL_2D_X[lutPos0], CELL_2D_X[lutPos1], xs);
+	FN_DECIMAL ly1x = Lerp(CELL_2D_Y[lutPos0], CELL_2D_Y[lutPos1], xs);
+
+	x += Lerp(lx0x, lx1x, ys) * warpAmp;
+	y += Lerp(ly0x, ly1x, ys) * warpAmp;
+}
diff --git a/src/ThirdParty/fastnoise.hpp b/src/ThirdParty/fastnoise.hpp
new file mode 100644
index 0000000..f6c8d21
--- /dev/null
+++ b/src/ThirdParty/fastnoise.hpp
@@ -0,0 +1,311 @@
+// FastNoise.h
+//
+// MIT License
+//
+// Copyright(c) 2017 Jordan Peck
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files(the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions :
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//
+// The developer's email is jorzixdan.me2@gzixmail.com (for great email, take
+// off every 'zix'.)
+//
+
+// VERSION: 0.4.1
+
+#ifndef FASTNOISE_H
+#define FASTNOISE_H
+
+// Uncomment the line below to use doubles throughout FastNoise instead of floats
+//#define FN_USE_DOUBLES
+
+#define FN_CELLULAR_INDEX_MAX 3
+
+#ifdef FN_USE_DOUBLES
+typedef double FN_DECIMAL;
+#else
+typedef float FN_DECIMAL;
+#endif
+
+class FastNoise
+{
+public:
+	explicit FastNoise(int seed = 1337) { SetSeed(seed); CalculateFractalBounding(); }
+
+	enum NoiseType { Value, ValueFractal, Perlin, PerlinFractal, Simplex, SimplexFractal, Cellular, WhiteNoise, Cubic, CubicFractal };
+	enum Interp { Linear, Hermite, Quintic };
+	enum FractalType { FBM, Billow, RigidMulti };
+	enum CellularDistanceFunction { Euclidean, Manhattan, Natural };
+	enum CellularReturnType { CellValue, NoiseLookup, Distance, Distance2, Distance2Add, Distance2Sub, Distance2Mul, Distance2Div };
+
+	// Sets seed used for all noise types
+	// Default: 1337
+	void SetSeed(int seed);
+
+	// Returns seed used for all noise types
+	int GetSeed() const { return m_seed; }
+
+	// Sets frequency for all noise types
+	// Default: 0.01
+	void SetFrequency(FN_DECIMAL frequency) { m_frequency = frequency; }
+
+	// Returns frequency used for all noise types
+	FN_DECIMAL GetFrequency() const { return m_frequency; }
+
+	// Changes the interpolation method used to smooth between noise values
+	// Possible interpolation methods (lowest to highest quality) :
+	// - Linear
+	// - Hermite
+	// - Quintic
+	// Used in Value, Perlin Noise and Position Warping
+	// Default: Quintic
+	void SetInterp(Interp interp) { m_interp = interp; }
+
+	// Returns interpolation method used for supported noise types
+	Interp GetInterp() const { return m_interp; }
+
+	// Sets noise return type of GetNoise(...)
+	// Default: Simplex
+	void SetNoiseType(NoiseType noiseType) { m_noiseType = noiseType; }
+
+	// Returns the noise type used by GetNoise
+	NoiseType GetNoiseType() const { return m_noiseType; }
+
+	// Sets octave count for all fractal noise types
+	// Default: 3
+	void SetFractalOctaves(int octaves) { m_octaves = octaves; CalculateFractalBounding(); }
+
+	// Returns octave count for all fractal noise types
+	int GetFractalOctaves() const { return m_octaves; }
+	
+	// Sets octave lacunarity for all fractal noise types
+	// Default: 2.0
+	void SetFractalLacunarity(FN_DECIMAL lacunarity) { m_lacunarity = lacunarity; }
+
+	// Returns octave lacunarity for all fractal noise types
+	FN_DECIMAL GetFractalLacunarity() const { return m_lacunarity; }
+
+	// Sets octave gain for all fractal noise types
+	// Default: 0.5
+	void SetFractalGain(FN_DECIMAL gain) { m_gain = gain; CalculateFractalBounding(); }
+
+	// Returns octave gain for all fractal noise types
+	FN_DECIMAL GetFractalGain() const { return m_gain; }
+
+	// Sets method for combining octaves in all fractal noise types
+	// Default: FBM
+	void SetFractalType(FractalType fractalType) { m_fractalType = fractalType; }
+
+	// Returns method for combining octaves in all fractal noise types
+	FractalType GetFractalType() const { return m_fractalType; }
+
+
+	// Sets distance function used in cellular noise calculations
+	// Default: Euclidean
+	void SetCellularDistanceFunction(CellularDistanceFunction cellularDistanceFunction) { m_cellularDistanceFunction = cellularDistanceFunction; }
+
+	// Returns the distance function used in cellular noise calculations
+	CellularDistanceFunction GetCellularDistanceFunction() const { return m_cellularDistanceFunction; }
+
+	// Sets return type from cellular noise calculations
+	// Note: NoiseLookup requires another FastNoise object be set with SetCellularNoiseLookup() to function
+	// Default: CellValue
+	void SetCellularReturnType(CellularReturnType cellularReturnType) { m_cellularReturnType = cellularReturnType; }
+
+	// Returns the return type from cellular noise calculations
+	CellularReturnType GetCellularReturnType() const { return m_cellularReturnType; }
+
+	// Noise used to calculate a cell value if cellular return type is NoiseLookup
+	// The lookup value is acquired through GetNoise() so ensure you SetNoiseType() on the noise lookup, value, Perlin or simplex is recommended
+	void SetCellularNoiseLookup(FastNoise* noise) { m_cellularNoiseLookup = noise; }
+
+	// Returns the noise used to calculate a cell value if the cellular return type is NoiseLookup
+	FastNoise* GetCellularNoiseLookup() const { return m_cellularNoiseLookup; }
+
+	// Sets the 2 distance indices used for distance2 return types
+	// Default: 0, 1
+	// Note: index0 should be lower than index1
+	// Both indices must be >= 0, index1 must be < 4
+	void SetCellularDistance2Indices(int cellularDistanceIndex0, int cellularDistanceIndex1);
+
+	// Returns the 2 distance indices used for distance2 return types
+	void GetCellularDistance2Indices(int& cellularDistanceIndex0, int& cellularDistanceIndex1) const;
+
+	// Sets the maximum distance a cellular point can move from its grid position
+	// Setting this high will make artifacts more common
+	// Default: 0.45
+	void SetCellularJitter(FN_DECIMAL cellularJitter) { m_cellularJitter = cellularJitter; }
+
+	// Returns the maximum distance a cellular point can move from its grid position
+	FN_DECIMAL GetCellularJitter() const { return m_cellularJitter; }
+
+	// Sets the maximum warp distance from original location when using GradientPerturb{Fractal}(...)
+	// Default: 1.0
+	void SetGradientPerturbAmp(FN_DECIMAL gradientPerturbAmp) { m_gradientPerturbAmp = gradientPerturbAmp; }
+
+	// Returns the maximum warp distance from original location when using GradientPerturb{Fractal}(...)
+	FN_DECIMAL GetGradientPerturbAmp() const { return m_gradientPerturbAmp; }
+
+	//2D
+	FN_DECIMAL GetValue(FN_DECIMAL x, FN_DECIMAL y) const;
+	FN_DECIMAL GetValueFractal(FN_DECIMAL x, FN_DECIMAL y) const;
+
+	FN_DECIMAL GetPerlin(FN_DECIMAL x, FN_DECIMAL y) const;
+	FN_DECIMAL GetPerlinFractal(FN_DECIMAL x, FN_DECIMAL y) const;
+
+	FN_DECIMAL GetSimplex(FN_DECIMAL x, FN_DECIMAL y) const;
+	FN_DECIMAL GetSimplexFractal(FN_DECIMAL x, FN_DECIMAL y) const;
+
+	FN_DECIMAL GetCellular(FN_DECIMAL x, FN_DECIMAL y) const;
+
+	FN_DECIMAL GetWhiteNoise(FN_DECIMAL x, FN_DECIMAL y) const;
+	FN_DECIMAL GetWhiteNoiseInt(int x, int y) const;
+
+	FN_DECIMAL GetCubic(FN_DECIMAL x, FN_DECIMAL y) const;
+	FN_DECIMAL GetCubicFractal(FN_DECIMAL x, FN_DECIMAL y) const;
+
+	FN_DECIMAL GetNoise(FN_DECIMAL x, FN_DECIMAL y) const;
+
+	void GradientPerturb(FN_DECIMAL& x, FN_DECIMAL& y) const;
+	void GradientPerturbFractal(FN_DECIMAL& x, FN_DECIMAL& y) const;
+
+	//3D
+	FN_DECIMAL GetValue(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+	FN_DECIMAL GetValueFractal(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+
+	FN_DECIMAL GetPerlin(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+	FN_DECIMAL GetPerlinFractal(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+
+	FN_DECIMAL GetSimplex(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+	FN_DECIMAL GetSimplexFractal(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+
+	FN_DECIMAL GetCellular(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+
+	FN_DECIMAL GetWhiteNoise(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+	FN_DECIMAL GetWhiteNoiseInt(int x, int y, int z) const;
+
+	FN_DECIMAL GetCubic(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+	FN_DECIMAL GetCubicFractal(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+
+	FN_DECIMAL GetNoise(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+
+	void GradientPerturb(FN_DECIMAL& x, FN_DECIMAL& y, FN_DECIMAL& z) const;
+	void GradientPerturbFractal(FN_DECIMAL& x, FN_DECIMAL& y, FN_DECIMAL& z) const;
+
+	//4D
+	FN_DECIMAL GetSimplex(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z, FN_DECIMAL w) const;
+
+	FN_DECIMAL GetWhiteNoise(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z, FN_DECIMAL w) const;
+	FN_DECIMAL GetWhiteNoiseInt(int x, int y, int z, int w) const;
+
+private:
+	unsigned char m_perm[512];
+	unsigned char m_perm12[512];
+
+	int m_seed = 1337;
+	FN_DECIMAL m_frequency = FN_DECIMAL(0.01);
+	Interp m_interp = Quintic;
+	NoiseType m_noiseType = Simplex;
+
+	int m_octaves = 3;
+	FN_DECIMAL m_lacunarity = FN_DECIMAL(2);
+	FN_DECIMAL m_gain = FN_DECIMAL(0.5);
+	FractalType m_fractalType = FBM;
+	FN_DECIMAL m_fractalBounding;
+
+	CellularDistanceFunction m_cellularDistanceFunction = Euclidean;
+	CellularReturnType m_cellularReturnType = CellValue;
+	FastNoise* m_cellularNoiseLookup = nullptr;
+	int m_cellularDistanceIndex0 = 0;
+	int m_cellularDistanceIndex1 = 1;
+	FN_DECIMAL m_cellularJitter = FN_DECIMAL(0.45);
+
+	FN_DECIMAL m_gradientPerturbAmp = FN_DECIMAL(1);
+
+	void CalculateFractalBounding();
+
+	//2D
+	FN_DECIMAL SingleValueFractalFBM(FN_DECIMAL x, FN_DECIMAL y) const;
+	FN_DECIMAL SingleValueFractalBillow(FN_DECIMAL x, FN_DECIMAL y) const;
+	FN_DECIMAL SingleValueFractalRigidMulti(FN_DECIMAL x, FN_DECIMAL y) const;
+	FN_DECIMAL SingleValue(unsigned char offset, FN_DECIMAL x, FN_DECIMAL y) const;
+
+	FN_DECIMAL SinglePerlinFractalFBM(FN_DECIMAL x, FN_DECIMAL y) const;
+	FN_DECIMAL SinglePerlinFractalBillow(FN_DECIMAL x, FN_DECIMAL y) const;
+	FN_DECIMAL SinglePerlinFractalRigidMulti(FN_DECIMAL x, FN_DECIMAL y) const;
+	FN_DECIMAL SinglePerlin(unsigned char offset, FN_DECIMAL x, FN_DECIMAL y) const;
+
+	FN_DECIMAL SingleSimplexFractalFBM(FN_DECIMAL x, FN_DECIMAL y) const;
+	FN_DECIMAL SingleSimplexFractalBillow(FN_DECIMAL x, FN_DECIMAL y) const;
+	FN_DECIMAL SingleSimplexFractalRigidMulti(FN_DECIMAL x, FN_DECIMAL y) const;
+	FN_DECIMAL SingleSimplexFractalBlend(FN_DECIMAL x, FN_DECIMAL y) const;
+	FN_DECIMAL SingleSimplex(unsigned char offset, FN_DECIMAL x, FN_DECIMAL y) const;
+
+	FN_DECIMAL SingleCubicFractalFBM(FN_DECIMAL x, FN_DECIMAL y) const;
+	FN_DECIMAL SingleCubicFractalBillow(FN_DECIMAL x, FN_DECIMAL y) const;
+	FN_DECIMAL SingleCubicFractalRigidMulti(FN_DECIMAL x, FN_DECIMAL y) const;
+	FN_DECIMAL SingleCubic(unsigned char offset, FN_DECIMAL x, FN_DECIMAL y) const;
+
+	FN_DECIMAL SingleCellular(FN_DECIMAL x, FN_DECIMAL y) const;
+	FN_DECIMAL SingleCellular2Edge(FN_DECIMAL x, FN_DECIMAL y) const;
+
+	void SingleGradientPerturb(unsigned char offset, FN_DECIMAL warpAmp, FN_DECIMAL frequency, FN_DECIMAL& x, FN_DECIMAL& y) const;
+
+	//3D
+	FN_DECIMAL SingleValueFractalFBM(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+	FN_DECIMAL SingleValueFractalBillow(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+	FN_DECIMAL SingleValueFractalRigidMulti(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+	FN_DECIMAL SingleValue(unsigned char offset, FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+
+	FN_DECIMAL SinglePerlinFractalFBM(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+	FN_DECIMAL SinglePerlinFractalBillow(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+	FN_DECIMAL SinglePerlinFractalRigidMulti(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+	FN_DECIMAL SinglePerlin(unsigned char offset, FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+
+	FN_DECIMAL SingleSimplexFractalFBM(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+	FN_DECIMAL SingleSimplexFractalBillow(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+	FN_DECIMAL SingleSimplexFractalRigidMulti(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+	FN_DECIMAL SingleSimplex(unsigned char offset, FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+
+	FN_DECIMAL SingleCubicFractalFBM(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+	FN_DECIMAL SingleCubicFractalBillow(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+	FN_DECIMAL SingleCubicFractalRigidMulti(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+	FN_DECIMAL SingleCubic(unsigned char offset, FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+
+	FN_DECIMAL SingleCellular(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+	FN_DECIMAL SingleCellular2Edge(FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z) const;
+
+	void SingleGradientPerturb(unsigned char offset, FN_DECIMAL warpAmp, FN_DECIMAL frequency, FN_DECIMAL& x, FN_DECIMAL& y, FN_DECIMAL& z) const;
+
+	//4D
+	FN_DECIMAL SingleSimplex(unsigned char offset, FN_DECIMAL x, FN_DECIMAL y, FN_DECIMAL z, FN_DECIMAL w) const;
+
+	inline unsigned char Index2D_12(unsigned char offset, int x, int y) const;
+	inline unsigned char Index3D_12(unsigned char offset, int x, int y, int z) const;
+	inline unsigned char Index4D_32(unsigned char offset, int x, int y, int z, int w) const;
+	inline unsigned char Index2D_256(unsigned char offset, int x, int y) const;
+	inline unsigned char Index3D_256(unsigned char offset, int x, int y, int z) const;
+	inline unsigned char Index4D_256(unsigned char offset, int x, int y, int z, int w) const;
+
+	inline FN_DECIMAL ValCoord2DFast(unsigned char offset, int x, int y) const;
+	inline FN_DECIMAL ValCoord3DFast(unsigned char offset, int x, int y, int z) const;
+	inline FN_DECIMAL GradCoord2D(unsigned char offset, int x, int y, FN_DECIMAL xd, FN_DECIMAL yd) const;
+	inline FN_DECIMAL GradCoord3D(unsigned char offset, int x, int y, int z, FN_DECIMAL xd, FN_DECIMAL yd, FN_DECIMAL zd) const;
+	inline FN_DECIMAL GradCoord4D(unsigned char offset, int x, int y, int z, int w, FN_DECIMAL xd, FN_DECIMAL yd, FN_DECIMAL zd, FN_DECIMAL wd) const;
+};
+#endif
diff --git a/src/ThirdParty/glad.c b/src/ThirdParty/glad.c
new file mode 100644
index 0000000..5e04eda
--- /dev/null
+++ b/src/ThirdParty/glad.c
@@ -0,0 +1,1843 @@
+/*
+
+	OpenGL loader generated by glad 0.1.28 on Tue Jan 15 14:49:19 2019.
+
+	Language/Generator: C/C++
+	Specification: gl
+	APIs: gl=3.3
+	Profile: compatibility
+	Extensions:
+
+	Loader: True
+	Local files: False
+	Omit khrplatform: False
+	Reproducible: False
+
+	Commandline:
+		--profile="compatibility" --api="gl=3.3" --generator="c" --spec="gl" --extensions=""
+	Online:
+		https://glad.dav1d.de/#profile=compatibility&language=c&specification=gl&loader=on&api=gl%3D3.3
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <glad/glad.h>
+
+static void* get_proc(const char* namez);
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+#include <windows.h>
+static HMODULE libGL;
+
+typedef void* (APIENTRYP PFNWGLGETPROCADDRESSPROC_PRIVATE)(const char*);
+static PFNWGLGETPROCADDRESSPROC_PRIVATE gladGetProcAddressPtr;
+
+#ifdef _MSC_VER
+#ifdef __has_include
+#if __has_include(<winapifamily.h>)
+#define HAVE_WINAPIFAMILY 1
+#endif
+#elif _MSC_VER >= 1700 && !_USING_V110_SDK71_
+#define HAVE_WINAPIFAMILY 1
+#endif
+#endif
+
+#ifdef HAVE_WINAPIFAMILY
+#include <winapifamily.h>
+#if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
+#define IS_UWP 1
+#endif
+#endif
+
+static
+int open_gl(void) {
+#ifndef IS_UWP
+	libGL = LoadLibraryW(L"opengl32.dll");
+	if (libGL != NULL) {
+		void (*tmp)(void);
+		tmp = (void(*)(void)) GetProcAddress(libGL, "wglGetProcAddress");
+		gladGetProcAddressPtr = (PFNWGLGETPROCADDRESSPROC_PRIVATE)tmp;
+		return gladGetProcAddressPtr != NULL;
+	}
+#endif
+
+	return 0;
+}
+
+static
+void close_gl(void) {
+	if (libGL != NULL) {
+		FreeLibrary((HMODULE)libGL);
+		libGL = NULL;
+	}
+}
+#else
+#include <dlfcn.h>
+static void* libGL;
+
+#if !defined(__APPLE__) && !defined(__HAIKU__)
+typedef void* (APIENTRYP PFNGLXGETPROCADDRESSPROC_PRIVATE)(const char*);
+static PFNGLXGETPROCADDRESSPROC_PRIVATE gladGetProcAddressPtr;
+#endif
+
+static
+int open_gl(void) {
+#ifdef __APPLE__
+	static const char* NAMES[] = {
+		"../Frameworks/OpenGL.framework/OpenGL",
+		"/Library/Frameworks/OpenGL.framework/OpenGL",
+		"/System/Library/Frameworks/OpenGL.framework/OpenGL",
+		"/System/Library/Frameworks/OpenGL.framework/Versions/Current/OpenGL"
+	};
+#else
+	static const char* NAMES[] = { "libGL.so.1", "libGL.so" };
+#endif
+
+	unsigned int index = 0;
+	for (index = 0; index < (sizeof(NAMES) / sizeof(NAMES[0])); index++) {
+		libGL = dlopen(NAMES[index], RTLD_NOW | RTLD_GLOBAL);
+
+		if (libGL != NULL) {
+#if defined(__APPLE__) || defined(__HAIKU__)
+			return 1;
+#else
+			gladGetProcAddressPtr = (PFNGLXGETPROCADDRESSPROC_PRIVATE)dlsym(libGL,
+				"glXGetProcAddressARB");
+			return gladGetProcAddressPtr != NULL;
+#endif
+		}
+	}
+
+	return 0;
+}
+
+static
+void close_gl(void) {
+	if (libGL != NULL) {
+		dlclose(libGL);
+		libGL = NULL;
+	}
+}
+#endif
+
+static
+void* get_proc(const char* namez) {
+	void* result = NULL;
+	if (libGL == NULL) return NULL;
+
+#if !defined(__APPLE__) && !defined(__HAIKU__)
+	if (gladGetProcAddressPtr != NULL) {
+		result = gladGetProcAddressPtr(namez);
+	}
+#endif
+	if (result == NULL) {
+#if defined(_WIN32) || defined(__CYGWIN__)
+		result = (void*)GetProcAddress((HMODULE)libGL, namez);
+#else
+		result = dlsym(libGL, namez);
+#endif
+	}
+
+	return result;
+}
+
+int gladLoadGL(void) {
+	int status = 0;
+
+	if (open_gl()) {
+		status = gladLoadGLLoader(&get_proc);
+		close_gl();
+	}
+
+	return status;
+}
+
+struct gladGLversionStruct GLVersion = { 0, 0 };
+
+#if defined(GL_ES_VERSION_3_0) || defined(GL_VERSION_3_0)
+#define _GLAD_IS_SOME_NEW_VERSION 1
+#endif
+
+static int max_loaded_major;
+static int max_loaded_minor;
+
+static const char* exts = NULL;
+static int num_exts_i = 0;
+static char** exts_i = NULL;
+
+static int get_exts(void) {
+#ifdef _GLAD_IS_SOME_NEW_VERSION
+	if (max_loaded_major < 3) {
+#endif
+		exts = (const char*)glGetString(GL_EXTENSIONS);
+#ifdef _GLAD_IS_SOME_NEW_VERSION
+	}
+	else {
+		unsigned int index;
+
+		num_exts_i = 0;
+		glGetIntegerv(GL_NUM_EXTENSIONS, &num_exts_i);
+		if (num_exts_i > 0) {
+			char** tmp_exts_i = (char**)realloc((void*)exts_i, (size_t)num_exts_i * (sizeof * exts_i));
+			if (tmp_exts_i == NULL) {
+				return 0;
+			}
+			exts_i = tmp_exts_i;
+		}
+
+		if (exts_i == NULL) {
+			return 0;
+		}
+
+		for (index = 0; index < (unsigned)num_exts_i; index++) {
+			const char* gl_str_tmp = (const char*)glGetStringi(GL_EXTENSIONS, index);
+			size_t len = strlen(gl_str_tmp);
+
+			char* local_str = (char*)malloc((len + 1) * sizeof(char));
+			if (local_str != NULL) {
+				memcpy(local_str, gl_str_tmp, (len + 1) * sizeof(char));
+			}
+			exts_i[index] = local_str;
+		}
+	}
+#endif
+	return 1;
+}
+
+static void free_exts(void) {
+	if (exts_i != NULL) {
+		int index;
+		for (index = 0; index < num_exts_i; index++) {
+			free((char*)exts_i[index]);
+		}
+		free((void*)exts_i);
+		exts_i = NULL;
+	}
+}
+
+static int has_ext(const char* ext) {
+#ifdef _GLAD_IS_SOME_NEW_VERSION
+	if (max_loaded_major < 3) {
+#endif
+		const char* extensions;
+		const char* loc;
+		const char* terminator;
+		extensions = exts;
+		if (extensions == NULL || ext == NULL) {
+			return 0;
+		}
+
+		while (1) {
+			loc = strstr(extensions, ext);
+			if (loc == NULL) {
+				return 0;
+			}
+
+			terminator = loc + strlen(ext);
+			if ((loc == extensions || *(loc - 1) == ' ') &&
+				(*terminator == ' ' || *terminator == '\0')) {
+				return 1;
+			}
+			extensions = terminator;
+		}
+#ifdef _GLAD_IS_SOME_NEW_VERSION
+	}
+	else {
+		int index;
+		if (exts_i == NULL) return 0;
+		for (index = 0; index < num_exts_i; index++) {
+			const char* e = exts_i[index];
+
+			if (exts_i[index] != NULL && strcmp(e, ext) == 0) {
+				return 1;
+			}
+		}
+	}
+#endif
+
+	return 0;
+}
+int GLAD_GL_VERSION_1_0 = 0;
+int GLAD_GL_VERSION_1_1 = 0;
+int GLAD_GL_VERSION_1_2 = 0;
+int GLAD_GL_VERSION_1_3 = 0;
+int GLAD_GL_VERSION_1_4 = 0;
+int GLAD_GL_VERSION_1_5 = 0;
+int GLAD_GL_VERSION_2_0 = 0;
+int GLAD_GL_VERSION_2_1 = 0;
+int GLAD_GL_VERSION_3_0 = 0;
+int GLAD_GL_VERSION_3_1 = 0;
+int GLAD_GL_VERSION_3_2 = 0;
+int GLAD_GL_VERSION_3_3 = 0;
+PFNGLACCUMPROC glad_glAccum = NULL;
+PFNGLACTIVETEXTUREPROC glad_glActiveTexture = NULL;
+PFNGLALPHAFUNCPROC glad_glAlphaFunc = NULL;
+PFNGLARETEXTURESRESIDENTPROC glad_glAreTexturesResident = NULL;
+PFNGLARRAYELEMENTPROC glad_glArrayElement = NULL;
+PFNGLATTACHSHADERPROC glad_glAttachShader = NULL;
+PFNGLBEGINPROC glad_glBegin = NULL;
+PFNGLBEGINCONDITIONALRENDERPROC glad_glBeginConditionalRender = NULL;
+PFNGLBEGINQUERYPROC glad_glBeginQuery = NULL;
+PFNGLBEGINTRANSFORMFEEDBACKPROC glad_glBeginTransformFeedback = NULL;
+PFNGLBINDATTRIBLOCATIONPROC glad_glBindAttribLocation = NULL;
+PFNGLBINDBUFFERPROC glad_glBindBuffer = NULL;
+PFNGLBINDBUFFERBASEPROC glad_glBindBufferBase = NULL;
+PFNGLBINDBUFFERRANGEPROC glad_glBindBufferRange = NULL;
+PFNGLBINDFRAGDATALOCATIONPROC glad_glBindFragDataLocation = NULL;
+PFNGLBINDFRAGDATALOCATIONINDEXEDPROC glad_glBindFragDataLocationIndexed = NULL;
+PFNGLBINDFRAMEBUFFERPROC glad_glBindFramebuffer = NULL;
+PFNGLBINDRENDERBUFFERPROC glad_glBindRenderbuffer = NULL;
+PFNGLBINDSAMPLERPROC glad_glBindSampler = NULL;
+PFNGLBINDTEXTUREPROC glad_glBindTexture = NULL;
+PFNGLBINDVERTEXARRAYPROC glad_glBindVertexArray = NULL;
+PFNGLBITMAPPROC glad_glBitmap = NULL;
+PFNGLBLENDCOLORPROC glad_glBlendColor = NULL;
+PFNGLBLENDEQUATIONPROC glad_glBlendEquation = NULL;
+PFNGLBLENDEQUATIONSEPARATEPROC glad_glBlendEquationSeparate = NULL;
+PFNGLBLENDFUNCPROC glad_glBlendFunc = NULL;
+PFNGLBLENDFUNCSEPARATEPROC glad_glBlendFuncSeparate = NULL;
+PFNGLBLITFRAMEBUFFERPROC glad_glBlitFramebuffer = NULL;
+PFNGLBUFFERDATAPROC glad_glBufferData = NULL;
+PFNGLBUFFERSUBDATAPROC glad_glBufferSubData = NULL;
+PFNGLCALLLISTPROC glad_glCallList = NULL;
+PFNGLCALLLISTSPROC glad_glCallLists = NULL;
+PFNGLCHECKFRAMEBUFFERSTATUSPROC glad_glCheckFramebufferStatus = NULL;
+PFNGLCLAMPCOLORPROC glad_glClampColor = NULL;
+PFNGLCLEARPROC glad_glClear = NULL;
+PFNGLCLEARACCUMPROC glad_glClearAccum = NULL;
+PFNGLCLEARBUFFERFIPROC glad_glClearBufferfi = NULL;
+PFNGLCLEARBUFFERFVPROC glad_glClearBufferfv = NULL;
+PFNGLCLEARBUFFERIVPROC glad_glClearBufferiv = NULL;
+PFNGLCLEARBUFFERUIVPROC glad_glClearBufferuiv = NULL;
+PFNGLCLEARCOLORPROC glad_glClearColor = NULL;
+PFNGLCLEARDEPTHPROC glad_glClearDepth = NULL;
+PFNGLCLEARINDEXPROC glad_glClearIndex = NULL;
+PFNGLCLEARSTENCILPROC glad_glClearStencil = NULL;
+PFNGLCLIENTACTIVETEXTUREPROC glad_glClientActiveTexture = NULL;
+PFNGLCLIENTWAITSYNCPROC glad_glClientWaitSync = NULL;
+PFNGLCLIPPLANEPROC glad_glClipPlane = NULL;
+PFNGLCOLOR3BPROC glad_glColor3b = NULL;
+PFNGLCOLOR3BVPROC glad_glColor3bv = NULL;
+PFNGLCOLOR3DPROC glad_glColor3d = NULL;
+PFNGLCOLOR3DVPROC glad_glColor3dv = NULL;
+PFNGLCOLOR3FPROC glad_glColor3f = NULL;
+PFNGLCOLOR3FVPROC glad_glColor3fv = NULL;
+PFNGLCOLOR3IPROC glad_glColor3i = NULL;
+PFNGLCOLOR3IVPROC glad_glColor3iv = NULL;
+PFNGLCOLOR3SPROC glad_glColor3s = NULL;
+PFNGLCOLOR3SVPROC glad_glColor3sv = NULL;
+PFNGLCOLOR3UBPROC glad_glColor3ub = NULL;
+PFNGLCOLOR3UBVPROC glad_glColor3ubv = NULL;
+PFNGLCOLOR3UIPROC glad_glColor3ui = NULL;
+PFNGLCOLOR3UIVPROC glad_glColor3uiv = NULL;
+PFNGLCOLOR3USPROC glad_glColor3us = NULL;
+PFNGLCOLOR3USVPROC glad_glColor3usv = NULL;
+PFNGLCOLOR4BPROC glad_glColor4b = NULL;
+PFNGLCOLOR4BVPROC glad_glColor4bv = NULL;
+PFNGLCOLOR4DPROC glad_glColor4d = NULL;
+PFNGLCOLOR4DVPROC glad_glColor4dv = NULL;
+PFNGLCOLOR4FPROC glad_glColor4f = NULL;
+PFNGLCOLOR4FVPROC glad_glColor4fv = NULL;
+PFNGLCOLOR4IPROC glad_glColor4i = NULL;
+PFNGLCOLOR4IVPROC glad_glColor4iv = NULL;
+PFNGLCOLOR4SPROC glad_glColor4s = NULL;
+PFNGLCOLOR4SVPROC glad_glColor4sv = NULL;
+PFNGLCOLOR4UBPROC glad_glColor4ub = NULL;
+PFNGLCOLOR4UBVPROC glad_glColor4ubv = NULL;
+PFNGLCOLOR4UIPROC glad_glColor4ui = NULL;
+PFNGLCOLOR4UIVPROC glad_glColor4uiv = NULL;
+PFNGLCOLOR4USPROC glad_glColor4us = NULL;
+PFNGLCOLOR4USVPROC glad_glColor4usv = NULL;
+PFNGLCOLORMASKPROC glad_glColorMask = NULL;
+PFNGLCOLORMASKIPROC glad_glColorMaski = NULL;
+PFNGLCOLORMATERIALPROC glad_glColorMaterial = NULL;
+PFNGLCOLORP3UIPROC glad_glColorP3ui = NULL;
+PFNGLCOLORP3UIVPROC glad_glColorP3uiv = NULL;
+PFNGLCOLORP4UIPROC glad_glColorP4ui = NULL;
+PFNGLCOLORP4UIVPROC glad_glColorP4uiv = NULL;
+PFNGLCOLORPOINTERPROC glad_glColorPointer = NULL;
+PFNGLCOMPILESHADERPROC glad_glCompileShader = NULL;
+PFNGLCOMPRESSEDTEXIMAGE1DPROC glad_glCompressedTexImage1D = NULL;
+PFNGLCOMPRESSEDTEXIMAGE2DPROC glad_glCompressedTexImage2D = NULL;
+PFNGLCOMPRESSEDTEXIMAGE3DPROC glad_glCompressedTexImage3D = NULL;
+PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC glad_glCompressedTexSubImage1D = NULL;
+PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC glad_glCompressedTexSubImage2D = NULL;
+PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC glad_glCompressedTexSubImage3D = NULL;
+PFNGLCOPYBUFFERSUBDATAPROC glad_glCopyBufferSubData = NULL;
+PFNGLCOPYPIXELSPROC glad_glCopyPixels = NULL;
+PFNGLCOPYTEXIMAGE1DPROC glad_glCopyTexImage1D = NULL;
+PFNGLCOPYTEXIMAGE2DPROC glad_glCopyTexImage2D = NULL;
+PFNGLCOPYTEXSUBIMAGE1DPROC glad_glCopyTexSubImage1D = NULL;
+PFNGLCOPYTEXSUBIMAGE2DPROC glad_glCopyTexSubImage2D = NULL;
+PFNGLCOPYTEXSUBIMAGE3DPROC glad_glCopyTexSubImage3D = NULL;
+PFNGLCREATEPROGRAMPROC glad_glCreateProgram = NULL;
+PFNGLCREATESHADERPROC glad_glCreateShader = NULL;
+PFNGLCULLFACEPROC glad_glCullFace = NULL;
+PFNGLDELETEBUFFERSPROC glad_glDeleteBuffers = NULL;
+PFNGLDELETEFRAMEBUFFERSPROC glad_glDeleteFramebuffers = NULL;
+PFNGLDELETELISTSPROC glad_glDeleteLists = NULL;
+PFNGLDELETEPROGRAMPROC glad_glDeleteProgram = NULL;
+PFNGLDELETEQUERIESPROC glad_glDeleteQueries = NULL;
+PFNGLDELETERENDERBUFFERSPROC glad_glDeleteRenderbuffers = NULL;
+PFNGLDELETESAMPLERSPROC glad_glDeleteSamplers = NULL;
+PFNGLDELETESHADERPROC glad_glDeleteShader = NULL;
+PFNGLDELETESYNCPROC glad_glDeleteSync = NULL;
+PFNGLDELETETEXTURESPROC glad_glDeleteTextures = NULL;
+PFNGLDELETEVERTEXARRAYSPROC glad_glDeleteVertexArrays = NULL;
+PFNGLDEPTHFUNCPROC glad_glDepthFunc = NULL;
+PFNGLDEPTHMASKPROC glad_glDepthMask = NULL;
+PFNGLDEPTHRANGEPROC glad_glDepthRange = NULL;
+PFNGLDETACHSHADERPROC glad_glDetachShader = NULL;
+PFNGLDISABLEPROC glad_glDisable = NULL;
+PFNGLDISABLECLIENTSTATEPROC glad_glDisableClientState = NULL;
+PFNGLDISABLEVERTEXATTRIBARRAYPROC glad_glDisableVertexAttribArray = NULL;
+PFNGLDISABLEIPROC glad_glDisablei = NULL;
+PFNGLDRAWARRAYSPROC glad_glDrawArrays = NULL;
+PFNGLDRAWARRAYSINSTANCEDPROC glad_glDrawArraysInstanced = NULL;
+PFNGLDRAWBUFFERPROC glad_glDrawBuffer = NULL;
+PFNGLDRAWBUFFERSPROC glad_glDrawBuffers = NULL;
+PFNGLDRAWELEMENTSPROC glad_glDrawElements = NULL;
+PFNGLDRAWELEMENTSBASEVERTEXPROC glad_glDrawElementsBaseVertex = NULL;
+PFNGLDRAWELEMENTSINSTANCEDPROC glad_glDrawElementsInstanced = NULL;
+PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXPROC glad_glDrawElementsInstancedBaseVertex = NULL;
+PFNGLDRAWPIXELSPROC glad_glDrawPixels = NULL;
+PFNGLDRAWRANGEELEMENTSPROC glad_glDrawRangeElements = NULL;
+PFNGLDRAWRANGEELEMENTSBASEVERTEXPROC glad_glDrawRangeElementsBaseVertex = NULL;
+PFNGLEDGEFLAGPROC glad_glEdgeFlag = NULL;
+PFNGLEDGEFLAGPOINTERPROC glad_glEdgeFlagPointer = NULL;
+PFNGLEDGEFLAGVPROC glad_glEdgeFlagv = NULL;
+PFNGLENABLEPROC glad_glEnable = NULL;
+PFNGLENABLECLIENTSTATEPROC glad_glEnableClientState = NULL;
+PFNGLENABLEVERTEXATTRIBARRAYPROC glad_glEnableVertexAttribArray = NULL;
+PFNGLENABLEIPROC glad_glEnablei = NULL;
+PFNGLENDPROC glad_glEnd = NULL;
+PFNGLENDCONDITIONALRENDERPROC glad_glEndConditionalRender = NULL;
+PFNGLENDLISTPROC glad_glEndList = NULL;
+PFNGLENDQUERYPROC glad_glEndQuery = NULL;
+PFNGLENDTRANSFORMFEEDBACKPROC glad_glEndTransformFeedback = NULL;
+PFNGLEVALCOORD1DPROC glad_glEvalCoord1d = NULL;
+PFNGLEVALCOORD1DVPROC glad_glEvalCoord1dv = NULL;
+PFNGLEVALCOORD1FPROC glad_glEvalCoord1f = NULL;
+PFNGLEVALCOORD1FVPROC glad_glEvalCoord1fv = NULL;
+PFNGLEVALCOORD2DPROC glad_glEvalCoord2d = NULL;
+PFNGLEVALCOORD2DVPROC glad_glEvalCoord2dv = NULL;
+PFNGLEVALCOORD2FPROC glad_glEvalCoord2f = NULL;
+PFNGLEVALCOORD2FVPROC glad_glEvalCoord2fv = NULL;
+PFNGLEVALMESH1PROC glad_glEvalMesh1 = NULL;
+PFNGLEVALMESH2PROC glad_glEvalMesh2 = NULL;
+PFNGLEVALPOINT1PROC glad_glEvalPoint1 = NULL;
+PFNGLEVALPOINT2PROC glad_glEvalPoint2 = NULL;
+PFNGLFEEDBACKBUFFERPROC glad_glFeedbackBuffer = NULL;
+PFNGLFENCESYNCPROC glad_glFenceSync = NULL;
+PFNGLFINISHPROC glad_glFinish = NULL;
+PFNGLFLUSHPROC glad_glFlush = NULL;
+PFNGLFLUSHMAPPEDBUFFERRANGEPROC glad_glFlushMappedBufferRange = NULL;
+PFNGLFOGCOORDPOINTERPROC glad_glFogCoordPointer = NULL;
+PFNGLFOGCOORDDPROC glad_glFogCoordd = NULL;
+PFNGLFOGCOORDDVPROC glad_glFogCoorddv = NULL;
+PFNGLFOGCOORDFPROC glad_glFogCoordf = NULL;
+PFNGLFOGCOORDFVPROC glad_glFogCoordfv = NULL;
+PFNGLFOGFPROC glad_glFogf = NULL;
+PFNGLFOGFVPROC glad_glFogfv = NULL;
+PFNGLFOGIPROC glad_glFogi = NULL;
+PFNGLFOGIVPROC glad_glFogiv = NULL;
+PFNGLFRAMEBUFFERRENDERBUFFERPROC glad_glFramebufferRenderbuffer = NULL;
+PFNGLFRAMEBUFFERTEXTUREPROC glad_glFramebufferTexture = NULL;
+PFNGLFRAMEBUFFERTEXTURE1DPROC glad_glFramebufferTexture1D = NULL;
+PFNGLFRAMEBUFFERTEXTURE2DPROC glad_glFramebufferTexture2D = NULL;
+PFNGLFRAMEBUFFERTEXTURE3DPROC glad_glFramebufferTexture3D = NULL;
+PFNGLFRAMEBUFFERTEXTURELAYERPROC glad_glFramebufferTextureLayer = NULL;
+PFNGLFRONTFACEPROC glad_glFrontFace = NULL;
+PFNGLFRUSTUMPROC glad_glFrustum = NULL;
+PFNGLGENBUFFERSPROC glad_glGenBuffers = NULL;
+PFNGLGENFRAMEBUFFERSPROC glad_glGenFramebuffers = NULL;
+PFNGLGENLISTSPROC glad_glGenLists = NULL;
+PFNGLGENQUERIESPROC glad_glGenQueries = NULL;
+PFNGLGENRENDERBUFFERSPROC glad_glGenRenderbuffers = NULL;
+PFNGLGENSAMPLERSPROC glad_glGenSamplers = NULL;
+PFNGLGENTEXTURESPROC glad_glGenTextures = NULL;
+PFNGLGENVERTEXARRAYSPROC glad_glGenVertexArrays = NULL;
+PFNGLGENERATEMIPMAPPROC glad_glGenerateMipmap = NULL;
+PFNGLGETACTIVEATTRIBPROC glad_glGetActiveAttrib = NULL;
+PFNGLGETACTIVEUNIFORMPROC glad_glGetActiveUniform = NULL;
+PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC glad_glGetActiveUniformBlockName = NULL;
+PFNGLGETACTIVEUNIFORMBLOCKIVPROC glad_glGetActiveUniformBlockiv = NULL;
+PFNGLGETACTIVEUNIFORMNAMEPROC glad_glGetActiveUniformName = NULL;
+PFNGLGETACTIVEUNIFORMSIVPROC glad_glGetActiveUniformsiv = NULL;
+PFNGLGETATTACHEDSHADERSPROC glad_glGetAttachedShaders = NULL;
+PFNGLGETATTRIBLOCATIONPROC glad_glGetAttribLocation = NULL;
+PFNGLGETBOOLEANI_VPROC glad_glGetBooleani_v = NULL;
+PFNGLGETBOOLEANVPROC glad_glGetBooleanv = NULL;
+PFNGLGETBUFFERPARAMETERI64VPROC glad_glGetBufferParameteri64v = NULL;
+PFNGLGETBUFFERPARAMETERIVPROC glad_glGetBufferParameteriv = NULL;
+PFNGLGETBUFFERPOINTERVPROC glad_glGetBufferPointerv = NULL;
+PFNGLGETBUFFERSUBDATAPROC glad_glGetBufferSubData = NULL;
+PFNGLGETCLIPPLANEPROC glad_glGetClipPlane = NULL;
+PFNGLGETCOMPRESSEDTEXIMAGEPROC glad_glGetCompressedTexImage = NULL;
+PFNGLGETDOUBLEVPROC glad_glGetDoublev = NULL;
+PFNGLGETERRORPROC glad_glGetError = NULL;
+PFNGLGETFLOATVPROC glad_glGetFloatv = NULL;
+PFNGLGETFRAGDATAINDEXPROC glad_glGetFragDataIndex = NULL;
+PFNGLGETFRAGDATALOCATIONPROC glad_glGetFragDataLocation = NULL;
+PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC glad_glGetFramebufferAttachmentParameteriv = NULL;
+PFNGLGETINTEGER64I_VPROC glad_glGetInteger64i_v = NULL;
+PFNGLGETINTEGER64VPROC glad_glGetInteger64v = NULL;
+PFNGLGETINTEGERI_VPROC glad_glGetIntegeri_v = NULL;
+PFNGLGETINTEGERVPROC glad_glGetIntegerv = NULL;
+PFNGLGETLIGHTFVPROC glad_glGetLightfv = NULL;
+PFNGLGETLIGHTIVPROC glad_glGetLightiv = NULL;
+PFNGLGETMAPDVPROC glad_glGetMapdv = NULL;
+PFNGLGETMAPFVPROC glad_glGetMapfv = NULL;
+PFNGLGETMAPIVPROC glad_glGetMapiv = NULL;
+PFNGLGETMATERIALFVPROC glad_glGetMaterialfv = NULL;
+PFNGLGETMATERIALIVPROC glad_glGetMaterialiv = NULL;
+PFNGLGETMULTISAMPLEFVPROC glad_glGetMultisamplefv = NULL;
+PFNGLGETPIXELMAPFVPROC glad_glGetPixelMapfv = NULL;
+PFNGLGETPIXELMAPUIVPROC glad_glGetPixelMapuiv = NULL;
+PFNGLGETPIXELMAPUSVPROC glad_glGetPixelMapusv = NULL;
+PFNGLGETPOINTERVPROC glad_glGetPointerv = NULL;
+PFNGLGETPOLYGONSTIPPLEPROC glad_glGetPolygonStipple = NULL;
+PFNGLGETPROGRAMINFOLOGPROC glad_glGetProgramInfoLog = NULL;
+PFNGLGETPROGRAMIVPROC glad_glGetProgramiv = NULL;
+PFNGLGETQUERYOBJECTI64VPROC glad_glGetQueryObjecti64v = NULL;
+PFNGLGETQUERYOBJECTIVPROC glad_glGetQueryObjectiv = NULL;
+PFNGLGETQUERYOBJECTUI64VPROC glad_glGetQueryObjectui64v = NULL;
+PFNGLGETQUERYOBJECTUIVPROC glad_glGetQueryObjectuiv = NULL;
+PFNGLGETQUERYIVPROC glad_glGetQueryiv = NULL;
+PFNGLGETRENDERBUFFERPARAMETERIVPROC glad_glGetRenderbufferParameteriv = NULL;
+PFNGLGETSAMPLERPARAMETERIIVPROC glad_glGetSamplerParameterIiv = NULL;
+PFNGLGETSAMPLERPARAMETERIUIVPROC glad_glGetSamplerParameterIuiv = NULL;
+PFNGLGETSAMPLERPARAMETERFVPROC glad_glGetSamplerParameterfv = NULL;
+PFNGLGETSAMPLERPARAMETERIVPROC glad_glGetSamplerParameteriv = NULL;
+PFNGLGETSHADERINFOLOGPROC glad_glGetShaderInfoLog = NULL;
+PFNGLGETSHADERSOURCEPROC glad_glGetShaderSource = NULL;
+PFNGLGETSHADERIVPROC glad_glGetShaderiv = NULL;
+PFNGLGETSTRINGPROC glad_glGetString = NULL;
+PFNGLGETSTRINGIPROC glad_glGetStringi = NULL;
+PFNGLGETSYNCIVPROC glad_glGetSynciv = NULL;
+PFNGLGETTEXENVFVPROC glad_glGetTexEnvfv = NULL;
+PFNGLGETTEXENVIVPROC glad_glGetTexEnviv = NULL;
+PFNGLGETTEXGENDVPROC glad_glGetTexGendv = NULL;
+PFNGLGETTEXGENFVPROC glad_glGetTexGenfv = NULL;
+PFNGLGETTEXGENIVPROC glad_glGetTexGeniv = NULL;
+PFNGLGETTEXIMAGEPROC glad_glGetTexImage = NULL;
+PFNGLGETTEXLEVELPARAMETERFVPROC glad_glGetTexLevelParameterfv = NULL;
+PFNGLGETTEXLEVELPARAMETERIVPROC glad_glGetTexLevelParameteriv = NULL;
+PFNGLGETTEXPARAMETERIIVPROC glad_glGetTexParameterIiv = NULL;
+PFNGLGETTEXPARAMETERIUIVPROC glad_glGetTexParameterIuiv = NULL;
+PFNGLGETTEXPARAMETERFVPROC glad_glGetTexParameterfv = NULL;
+PFNGLGETTEXPARAMETERIVPROC glad_glGetTexParameteriv = NULL;
+PFNGLGETTRANSFORMFEEDBACKVARYINGPROC glad_glGetTransformFeedbackVarying = NULL;
+PFNGLGETUNIFORMBLOCKINDEXPROC glad_glGetUniformBlockIndex = NULL;
+PFNGLGETUNIFORMINDICESPROC glad_glGetUniformIndices = NULL;
+PFNGLGETUNIFORMLOCATIONPROC glad_glGetUniformLocation = NULL;
+PFNGLGETUNIFORMFVPROC glad_glGetUniformfv = NULL;
+PFNGLGETUNIFORMIVPROC glad_glGetUniformiv = NULL;
+PFNGLGETUNIFORMUIVPROC glad_glGetUniformuiv = NULL;
+PFNGLGETVERTEXATTRIBIIVPROC glad_glGetVertexAttribIiv = NULL;
+PFNGLGETVERTEXATTRIBIUIVPROC glad_glGetVertexAttribIuiv = NULL;
+PFNGLGETVERTEXATTRIBPOINTERVPROC glad_glGetVertexAttribPointerv = NULL;
+PFNGLGETVERTEXATTRIBDVPROC glad_glGetVertexAttribdv = NULL;
+PFNGLGETVERTEXATTRIBFVPROC glad_glGetVertexAttribfv = NULL;
+PFNGLGETVERTEXATTRIBIVPROC glad_glGetVertexAttribiv = NULL;
+PFNGLHINTPROC glad_glHint = NULL;
+PFNGLINDEXMASKPROC glad_glIndexMask = NULL;
+PFNGLINDEXPOINTERPROC glad_glIndexPointer = NULL;
+PFNGLINDEXDPROC glad_glIndexd = NULL;
+PFNGLINDEXDVPROC glad_glIndexdv = NULL;
+PFNGLINDEXFPROC glad_glIndexf = NULL;
+PFNGLINDEXFVPROC glad_glIndexfv = NULL;
+PFNGLINDEXIPROC glad_glIndexi = NULL;
+PFNGLINDEXIVPROC glad_glIndexiv = NULL;
+PFNGLINDEXSPROC glad_glIndexs = NULL;
+PFNGLINDEXSVPROC glad_glIndexsv = NULL;
+PFNGLINDEXUBPROC glad_glIndexub = NULL;
+PFNGLINDEXUBVPROC glad_glIndexubv = NULL;
+PFNGLINITNAMESPROC glad_glInitNames = NULL;
+PFNGLINTERLEAVEDARRAYSPROC glad_glInterleavedArrays = NULL;
+PFNGLISBUFFERPROC glad_glIsBuffer = NULL;
+PFNGLISENABLEDPROC glad_glIsEnabled = NULL;
+PFNGLISENABLEDIPROC glad_glIsEnabledi = NULL;
+PFNGLISFRAMEBUFFERPROC glad_glIsFramebuffer = NULL;
+PFNGLISLISTPROC glad_glIsList = NULL;
+PFNGLISPROGRAMPROC glad_glIsProgram = NULL;
+PFNGLISQUERYPROC glad_glIsQuery = NULL;
+PFNGLISRENDERBUFFERPROC glad_glIsRenderbuffer = NULL;
+PFNGLISSAMPLERPROC glad_glIsSampler = NULL;
+PFNGLISSHADERPROC glad_glIsShader = NULL;
+PFNGLISSYNCPROC glad_glIsSync = NULL;
+PFNGLISTEXTUREPROC glad_glIsTexture = NULL;
+PFNGLISVERTEXARRAYPROC glad_glIsVertexArray = NULL;
+PFNGLLIGHTMODELFPROC glad_glLightModelf = NULL;
+PFNGLLIGHTMODELFVPROC glad_glLightModelfv = NULL;
+PFNGLLIGHTMODELIPROC glad_glLightModeli = NULL;
+PFNGLLIGHTMODELIVPROC glad_glLightModeliv = NULL;
+PFNGLLIGHTFPROC glad_glLightf = NULL;
+PFNGLLIGHTFVPROC glad_glLightfv = NULL;
+PFNGLLIGHTIPROC glad_glLighti = NULL;
+PFNGLLIGHTIVPROC glad_glLightiv = NULL;
+PFNGLLINESTIPPLEPROC glad_glLineStipple = NULL;
+PFNGLLINEWIDTHPROC glad_glLineWidth = NULL;
+PFNGLLINKPROGRAMPROC glad_glLinkProgram = NULL;
+PFNGLLISTBASEPROC glad_glListBase = NULL;
+PFNGLLOADIDENTITYPROC glad_glLoadIdentity = NULL;
+PFNGLLOADMATRIXDPROC glad_glLoadMatrixd = NULL;
+PFNGLLOADMATRIXFPROC glad_glLoadMatrixf = NULL;
+PFNGLLOADNAMEPROC glad_glLoadName = NULL;
+PFNGLLOADTRANSPOSEMATRIXDPROC glad_glLoadTransposeMatrixd = NULL;
+PFNGLLOADTRANSPOSEMATRIXFPROC glad_glLoadTransposeMatrixf = NULL;
+PFNGLLOGICOPPROC glad_glLogicOp = NULL;
+PFNGLMAP1DPROC glad_glMap1d = NULL;
+PFNGLMAP1FPROC glad_glMap1f = NULL;
+PFNGLMAP2DPROC glad_glMap2d = NULL;
+PFNGLMAP2FPROC glad_glMap2f = NULL;
+PFNGLMAPBUFFERPROC glad_glMapBuffer = NULL;
+PFNGLMAPBUFFERRANGEPROC glad_glMapBufferRange = NULL;
+PFNGLMAPGRID1DPROC glad_glMapGrid1d = NULL;
+PFNGLMAPGRID1FPROC glad_glMapGrid1f = NULL;
+PFNGLMAPGRID2DPROC glad_glMapGrid2d = NULL;
+PFNGLMAPGRID2FPROC glad_glMapGrid2f = NULL;
+PFNGLMATERIALFPROC glad_glMaterialf = NULL;
+PFNGLMATERIALFVPROC glad_glMaterialfv = NULL;
+PFNGLMATERIALIPROC glad_glMateriali = NULL;
+PFNGLMATERIALIVPROC glad_glMaterialiv = NULL;
+PFNGLMATRIXMODEPROC glad_glMatrixMode = NULL;
+PFNGLMULTMATRIXDPROC glad_glMultMatrixd = NULL;
+PFNGLMULTMATRIXFPROC glad_glMultMatrixf = NULL;
+PFNGLMULTTRANSPOSEMATRIXDPROC glad_glMultTransposeMatrixd = NULL;
+PFNGLMULTTRANSPOSEMATRIXFPROC glad_glMultTransposeMatrixf = NULL;
+PFNGLMULTIDRAWARRAYSPROC glad_glMultiDrawArrays = NULL;
+PFNGLMULTIDRAWELEMENTSPROC glad_glMultiDrawElements = NULL;
+PFNGLMULTIDRAWELEMENTSBASEVERTEXPROC glad_glMultiDrawElementsBaseVertex = NULL;
+PFNGLMULTITEXCOORD1DPROC glad_glMultiTexCoord1d = NULL;
+PFNGLMULTITEXCOORD1DVPROC glad_glMultiTexCoord1dv = NULL;
+PFNGLMULTITEXCOORD1FPROC glad_glMultiTexCoord1f = NULL;
+PFNGLMULTITEXCOORD1FVPROC glad_glMultiTexCoord1fv = NULL;
+PFNGLMULTITEXCOORD1IPROC glad_glMultiTexCoord1i = NULL;
+PFNGLMULTITEXCOORD1IVPROC glad_glMultiTexCoord1iv = NULL;
+PFNGLMULTITEXCOORD1SPROC glad_glMultiTexCoord1s = NULL;
+PFNGLMULTITEXCOORD1SVPROC glad_glMultiTexCoord1sv = NULL;
+PFNGLMULTITEXCOORD2DPROC glad_glMultiTexCoord2d = NULL;
+PFNGLMULTITEXCOORD2DVPROC glad_glMultiTexCoord2dv = NULL;
+PFNGLMULTITEXCOORD2FPROC glad_glMultiTexCoord2f = NULL;
+PFNGLMULTITEXCOORD2FVPROC glad_glMultiTexCoord2fv = NULL;
+PFNGLMULTITEXCOORD2IPROC glad_glMultiTexCoord2i = NULL;
+PFNGLMULTITEXCOORD2IVPROC glad_glMultiTexCoord2iv = NULL;
+PFNGLMULTITEXCOORD2SPROC glad_glMultiTexCoord2s = NULL;
+PFNGLMULTITEXCOORD2SVPROC glad_glMultiTexCoord2sv = NULL;
+PFNGLMULTITEXCOORD3DPROC glad_glMultiTexCoord3d = NULL;
+PFNGLMULTITEXCOORD3DVPROC glad_glMultiTexCoord3dv = NULL;
+PFNGLMULTITEXCOORD3FPROC glad_glMultiTexCoord3f = NULL;
+PFNGLMULTITEXCOORD3FVPROC glad_glMultiTexCoord3fv = NULL;
+PFNGLMULTITEXCOORD3IPROC glad_glMultiTexCoord3i = NULL;
+PFNGLMULTITEXCOORD3IVPROC glad_glMultiTexCoord3iv = NULL;
+PFNGLMULTITEXCOORD3SPROC glad_glMultiTexCoord3s = NULL;
+PFNGLMULTITEXCOORD3SVPROC glad_glMultiTexCoord3sv = NULL;
+PFNGLMULTITEXCOORD4DPROC glad_glMultiTexCoord4d = NULL;
+PFNGLMULTITEXCOORD4DVPROC glad_glMultiTexCoord4dv = NULL;
+PFNGLMULTITEXCOORD4FPROC glad_glMultiTexCoord4f = NULL;
+PFNGLMULTITEXCOORD4FVPROC glad_glMultiTexCoord4fv = NULL;
+PFNGLMULTITEXCOORD4IPROC glad_glMultiTexCoord4i = NULL;
+PFNGLMULTITEXCOORD4IVPROC glad_glMultiTexCoord4iv = NULL;
+PFNGLMULTITEXCOORD4SPROC glad_glMultiTexCoord4s = NULL;
+PFNGLMULTITEXCOORD4SVPROC glad_glMultiTexCoord4sv = NULL;
+PFNGLMULTITEXCOORDP1UIPROC glad_glMultiTexCoordP1ui = NULL;
+PFNGLMULTITEXCOORDP1UIVPROC glad_glMultiTexCoordP1uiv = NULL;
+PFNGLMULTITEXCOORDP2UIPROC glad_glMultiTexCoordP2ui = NULL;
+PFNGLMULTITEXCOORDP2UIVPROC glad_glMultiTexCoordP2uiv = NULL;
+PFNGLMULTITEXCOORDP3UIPROC glad_glMultiTexCoordP3ui = NULL;
+PFNGLMULTITEXCOORDP3UIVPROC glad_glMultiTexCoordP3uiv = NULL;
+PFNGLMULTITEXCOORDP4UIPROC glad_glMultiTexCoordP4ui = NULL;
+PFNGLMULTITEXCOORDP4UIVPROC glad_glMultiTexCoordP4uiv = NULL;
+PFNGLNEWLISTPROC glad_glNewList = NULL;
+PFNGLNORMAL3BPROC glad_glNormal3b = NULL;
+PFNGLNORMAL3BVPROC glad_glNormal3bv = NULL;
+PFNGLNORMAL3DPROC glad_glNormal3d = NULL;
+PFNGLNORMAL3DVPROC glad_glNormal3dv = NULL;
+PFNGLNORMAL3FPROC glad_glNormal3f = NULL;
+PFNGLNORMAL3FVPROC glad_glNormal3fv = NULL;
+PFNGLNORMAL3IPROC glad_glNormal3i = NULL;
+PFNGLNORMAL3IVPROC glad_glNormal3iv = NULL;
+PFNGLNORMAL3SPROC glad_glNormal3s = NULL;
+PFNGLNORMAL3SVPROC glad_glNormal3sv = NULL;
+PFNGLNORMALP3UIPROC glad_glNormalP3ui = NULL;
+PFNGLNORMALP3UIVPROC glad_glNormalP3uiv = NULL;
+PFNGLNORMALPOINTERPROC glad_glNormalPointer = NULL;
+PFNGLORTHOPROC glad_glOrtho = NULL;
+PFNGLPASSTHROUGHPROC glad_glPassThrough = NULL;
+PFNGLPIXELMAPFVPROC glad_glPixelMapfv = NULL;
+PFNGLPIXELMAPUIVPROC glad_glPixelMapuiv = NULL;
+PFNGLPIXELMAPUSVPROC glad_glPixelMapusv = NULL;
+PFNGLPIXELSTOREFPROC glad_glPixelStoref = NULL;
+PFNGLPIXELSTOREIPROC glad_glPixelStorei = NULL;
+PFNGLPIXELTRANSFERFPROC glad_glPixelTransferf = NULL;
+PFNGLPIXELTRANSFERIPROC glad_glPixelTransferi = NULL;
+PFNGLPIXELZOOMPROC glad_glPixelZoom = NULL;
+PFNGLPOINTPARAMETERFPROC glad_glPointParameterf = NULL;
+PFNGLPOINTPARAMETERFVPROC glad_glPointParameterfv = NULL;
+PFNGLPOINTPARAMETERIPROC glad_glPointParameteri = NULL;
+PFNGLPOINTPARAMETERIVPROC glad_glPointParameteriv = NULL;
+PFNGLPOINTSIZEPROC glad_glPointSize = NULL;
+PFNGLPOLYGONMODEPROC glad_glPolygonMode = NULL;
+PFNGLPOLYGONOFFSETPROC glad_glPolygonOffset = NULL;
+PFNGLPOLYGONSTIPPLEPROC glad_glPolygonStipple = NULL;
+PFNGLPOPATTRIBPROC glad_glPopAttrib = NULL;
+PFNGLPOPCLIENTATTRIBPROC glad_glPopClientAttrib = NULL;
+PFNGLPOPMATRIXPROC glad_glPopMatrix = NULL;
+PFNGLPOPNAMEPROC glad_glPopName = NULL;
+PFNGLPRIMITIVERESTARTINDEXPROC glad_glPrimitiveRestartIndex = NULL;
+PFNGLPRIORITIZETEXTURESPROC glad_glPrioritizeTextures = NULL;
+PFNGLPROVOKINGVERTEXPROC glad_glProvokingVertex = NULL;
+PFNGLPUSHATTRIBPROC glad_glPushAttrib = NULL;
+PFNGLPUSHCLIENTATTRIBPROC glad_glPushClientAttrib = NULL;
+PFNGLPUSHMATRIXPROC glad_glPushMatrix = NULL;
+PFNGLPUSHNAMEPROC glad_glPushName = NULL;
+PFNGLQUERYCOUNTERPROC glad_glQueryCounter = NULL;
+PFNGLRASTERPOS2DPROC glad_glRasterPos2d = NULL;
+PFNGLRASTERPOS2DVPROC glad_glRasterPos2dv = NULL;
+PFNGLRASTERPOS2FPROC glad_glRasterPos2f = NULL;
+PFNGLRASTERPOS2FVPROC glad_glRasterPos2fv = NULL;
+PFNGLRASTERPOS2IPROC glad_glRasterPos2i = NULL;
+PFNGLRASTERPOS2IVPROC glad_glRasterPos2iv = NULL;
+PFNGLRASTERPOS2SPROC glad_glRasterPos2s = NULL;
+PFNGLRASTERPOS2SVPROC glad_glRasterPos2sv = NULL;
+PFNGLRASTERPOS3DPROC glad_glRasterPos3d = NULL;
+PFNGLRASTERPOS3DVPROC glad_glRasterPos3dv = NULL;
+PFNGLRASTERPOS3FPROC glad_glRasterPos3f = NULL;
+PFNGLRASTERPOS3FVPROC glad_glRasterPos3fv = NULL;
+PFNGLRASTERPOS3IPROC glad_glRasterPos3i = NULL;
+PFNGLRASTERPOS3IVPROC glad_glRasterPos3iv = NULL;
+PFNGLRASTERPOS3SPROC glad_glRasterPos3s = NULL;
+PFNGLRASTERPOS3SVPROC glad_glRasterPos3sv = NULL;
+PFNGLRASTERPOS4DPROC glad_glRasterPos4d = NULL;
+PFNGLRASTERPOS4DVPROC glad_glRasterPos4dv = NULL;
+PFNGLRASTERPOS4FPROC glad_glRasterPos4f = NULL;
+PFNGLRASTERPOS4FVPROC glad_glRasterPos4fv = NULL;
+PFNGLRASTERPOS4IPROC glad_glRasterPos4i = NULL;
+PFNGLRASTERPOS4IVPROC glad_glRasterPos4iv = NULL;
+PFNGLRASTERPOS4SPROC glad_glRasterPos4s = NULL;
+PFNGLRASTERPOS4SVPROC glad_glRasterPos4sv = NULL;
+PFNGLREADBUFFERPROC glad_glReadBuffer = NULL;
+PFNGLREADPIXELSPROC glad_glReadPixels = NULL;
+PFNGLRECTDPROC glad_glRectd = NULL;
+PFNGLRECTDVPROC glad_glRectdv = NULL;
+PFNGLRECTFPROC glad_glRectf = NULL;
+PFNGLRECTFVPROC glad_glRectfv = NULL;
+PFNGLRECTIPROC glad_glRecti = NULL;
+PFNGLRECTIVPROC glad_glRectiv = NULL;
+PFNGLRECTSPROC glad_glRects = NULL;
+PFNGLRECTSVPROC glad_glRectsv = NULL;
+PFNGLRENDERMODEPROC glad_glRenderMode = NULL;
+PFNGLRENDERBUFFERSTORAGEPROC glad_glRenderbufferStorage = NULL;
+PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC glad_glRenderbufferStorageMultisample = NULL;
+PFNGLROTATEDPROC glad_glRotated = NULL;
+PFNGLROTATEFPROC glad_glRotatef = NULL;
+PFNGLSAMPLECOVERAGEPROC glad_glSampleCoverage = NULL;
+PFNGLSAMPLEMASKIPROC glad_glSampleMaski = NULL;
+PFNGLSAMPLERPARAMETERIIVPROC glad_glSamplerParameterIiv = NULL;
+PFNGLSAMPLERPARAMETERIUIVPROC glad_glSamplerParameterIuiv = NULL;
+PFNGLSAMPLERPARAMETERFPROC glad_glSamplerParameterf = NULL;
+PFNGLSAMPLERPARAMETERFVPROC glad_glSamplerParameterfv = NULL;
+PFNGLSAMPLERPARAMETERIPROC glad_glSamplerParameteri = NULL;
+PFNGLSAMPLERPARAMETERIVPROC glad_glSamplerParameteriv = NULL;
+PFNGLSCALEDPROC glad_glScaled = NULL;
+PFNGLSCALEFPROC glad_glScalef = NULL;
+PFNGLSCISSORPROC glad_glScissor = NULL;
+PFNGLSECONDARYCOLOR3BPROC glad_glSecondaryColor3b = NULL;
+PFNGLSECONDARYCOLOR3BVPROC glad_glSecondaryColor3bv = NULL;
+PFNGLSECONDARYCOLOR3DPROC glad_glSecondaryColor3d = NULL;
+PFNGLSECONDARYCOLOR3DVPROC glad_glSecondaryColor3dv = NULL;
+PFNGLSECONDARYCOLOR3FPROC glad_glSecondaryColor3f = NULL;
+PFNGLSECONDARYCOLOR3FVPROC glad_glSecondaryColor3fv = NULL;
+PFNGLSECONDARYCOLOR3IPROC glad_glSecondaryColor3i = NULL;
+PFNGLSECONDARYCOLOR3IVPROC glad_glSecondaryColor3iv = NULL;
+PFNGLSECONDARYCOLOR3SPROC glad_glSecondaryColor3s = NULL;
+PFNGLSECONDARYCOLOR3SVPROC glad_glSecondaryColor3sv = NULL;
+PFNGLSECONDARYCOLOR3UBPROC glad_glSecondaryColor3ub = NULL;
+PFNGLSECONDARYCOLOR3UBVPROC glad_glSecondaryColor3ubv = NULL;
+PFNGLSECONDARYCOLOR3UIPROC glad_glSecondaryColor3ui = NULL;
+PFNGLSECONDARYCOLOR3UIVPROC glad_glSecondaryColor3uiv = NULL;
+PFNGLSECONDARYCOLOR3USPROC glad_glSecondaryColor3us = NULL;
+PFNGLSECONDARYCOLOR3USVPROC glad_glSecondaryColor3usv = NULL;
+PFNGLSECONDARYCOLORP3UIPROC glad_glSecondaryColorP3ui = NULL;
+PFNGLSECONDARYCOLORP3UIVPROC glad_glSecondaryColorP3uiv = NULL;
+PFNGLSECONDARYCOLORPOINTERPROC glad_glSecondaryColorPointer = NULL;
+PFNGLSELECTBUFFERPROC glad_glSelectBuffer = NULL;
+PFNGLSHADEMODELPROC glad_glShadeModel = NULL;
+PFNGLSHADERSOURCEPROC glad_glShaderSource = NULL;
+PFNGLSTENCILFUNCPROC glad_glStencilFunc = NULL;
+PFNGLSTENCILFUNCSEPARATEPROC glad_glStencilFuncSeparate = NULL;
+PFNGLSTENCILMASKPROC glad_glStencilMask = NULL;
+PFNGLSTENCILMASKSEPARATEPROC glad_glStencilMaskSeparate = NULL;
+PFNGLSTENCILOPPROC glad_glStencilOp = NULL;
+PFNGLSTENCILOPSEPARATEPROC glad_glStencilOpSeparate = NULL;
+PFNGLTEXBUFFERPROC glad_glTexBuffer = NULL;
+PFNGLTEXCOORD1DPROC glad_glTexCoord1d = NULL;
+PFNGLTEXCOORD1DVPROC glad_glTexCoord1dv = NULL;
+PFNGLTEXCOORD1FPROC glad_glTexCoord1f = NULL;
+PFNGLTEXCOORD1FVPROC glad_glTexCoord1fv = NULL;
+PFNGLTEXCOORD1IPROC glad_glTexCoord1i = NULL;
+PFNGLTEXCOORD1IVPROC glad_glTexCoord1iv = NULL;
+PFNGLTEXCOORD1SPROC glad_glTexCoord1s = NULL;
+PFNGLTEXCOORD1SVPROC glad_glTexCoord1sv = NULL;
+PFNGLTEXCOORD2DPROC glad_glTexCoord2d = NULL;
+PFNGLTEXCOORD2DVPROC glad_glTexCoord2dv = NULL;
+PFNGLTEXCOORD2FPROC glad_glTexCoord2f = NULL;
+PFNGLTEXCOORD2FVPROC glad_glTexCoord2fv = NULL;
+PFNGLTEXCOORD2IPROC glad_glTexCoord2i = NULL;
+PFNGLTEXCOORD2IVPROC glad_glTexCoord2iv = NULL;
+PFNGLTEXCOORD2SPROC glad_glTexCoord2s = NULL;
+PFNGLTEXCOORD2SVPROC glad_glTexCoord2sv = NULL;
+PFNGLTEXCOORD3DPROC glad_glTexCoord3d = NULL;
+PFNGLTEXCOORD3DVPROC glad_glTexCoord3dv = NULL;
+PFNGLTEXCOORD3FPROC glad_glTexCoord3f = NULL;
+PFNGLTEXCOORD3FVPROC glad_glTexCoord3fv = NULL;
+PFNGLTEXCOORD3IPROC glad_glTexCoord3i = NULL;
+PFNGLTEXCOORD3IVPROC glad_glTexCoord3iv = NULL;
+PFNGLTEXCOORD3SPROC glad_glTexCoord3s = NULL;
+PFNGLTEXCOORD3SVPROC glad_glTexCoord3sv = NULL;
+PFNGLTEXCOORD4DPROC glad_glTexCoord4d = NULL;
+PFNGLTEXCOORD4DVPROC glad_glTexCoord4dv = NULL;
+PFNGLTEXCOORD4FPROC glad_glTexCoord4f = NULL;
+PFNGLTEXCOORD4FVPROC glad_glTexCoord4fv = NULL;
+PFNGLTEXCOORD4IPROC glad_glTexCoord4i = NULL;
+PFNGLTEXCOORD4IVPROC glad_glTexCoord4iv = NULL;
+PFNGLTEXCOORD4SPROC glad_glTexCoord4s = NULL;
+PFNGLTEXCOORD4SVPROC glad_glTexCoord4sv = NULL;
+PFNGLTEXCOORDP1UIPROC glad_glTexCoordP1ui = NULL;
+PFNGLTEXCOORDP1UIVPROC glad_glTexCoordP1uiv = NULL;
+PFNGLTEXCOORDP2UIPROC glad_glTexCoordP2ui = NULL;
+PFNGLTEXCOORDP2UIVPROC glad_glTexCoordP2uiv = NULL;
+PFNGLTEXCOORDP3UIPROC glad_glTexCoordP3ui = NULL;
+PFNGLTEXCOORDP3UIVPROC glad_glTexCoordP3uiv = NULL;
+PFNGLTEXCOORDP4UIPROC glad_glTexCoordP4ui = NULL;
+PFNGLTEXCOORDP4UIVPROC glad_glTexCoordP4uiv = NULL;
+PFNGLTEXCOORDPOINTERPROC glad_glTexCoordPointer = NULL;
+PFNGLTEXENVFPROC glad_glTexEnvf = NULL;
+PFNGLTEXENVFVPROC glad_glTexEnvfv = NULL;
+PFNGLTEXENVIPROC glad_glTexEnvi = NULL;
+PFNGLTEXENVIVPROC glad_glTexEnviv = NULL;
+PFNGLTEXGENDPROC glad_glTexGend = NULL;
+PFNGLTEXGENDVPROC glad_glTexGendv = NULL;
+PFNGLTEXGENFPROC glad_glTexGenf = NULL;
+PFNGLTEXGENFVPROC glad_glTexGenfv = NULL;
+PFNGLTEXGENIPROC glad_glTexGeni = NULL;
+PFNGLTEXGENIVPROC glad_glTexGeniv = NULL;
+PFNGLTEXIMAGE1DPROC glad_glTexImage1D = NULL;
+PFNGLTEXIMAGE2DPROC glad_glTexImage2D = NULL;
+PFNGLTEXIMAGE2DMULTISAMPLEPROC glad_glTexImage2DMultisample = NULL;
+PFNGLTEXIMAGE3DPROC glad_glTexImage3D = NULL;
+PFNGLTEXIMAGE3DMULTISAMPLEPROC glad_glTexImage3DMultisample = NULL;
+PFNGLTEXPARAMETERIIVPROC glad_glTexParameterIiv = NULL;
+PFNGLTEXPARAMETERIUIVPROC glad_glTexParameterIuiv = NULL;
+PFNGLTEXPARAMETERFPROC glad_glTexParameterf = NULL;
+PFNGLTEXPARAMETERFVPROC glad_glTexParameterfv = NULL;
+PFNGLTEXPARAMETERIPROC glad_glTexParameteri = NULL;
+PFNGLTEXPARAMETERIVPROC glad_glTexParameteriv = NULL;
+PFNGLTEXSUBIMAGE1DPROC glad_glTexSubImage1D = NULL;
+PFNGLTEXSUBIMAGE2DPROC glad_glTexSubImage2D = NULL;
+PFNGLTEXSUBIMAGE3DPROC glad_glTexSubImage3D = NULL;
+PFNGLTRANSFORMFEEDBACKVARYINGSPROC glad_glTransformFeedbackVaryings = NULL;
+PFNGLTRANSLATEDPROC glad_glTranslated = NULL;
+PFNGLTRANSLATEFPROC glad_glTranslatef = NULL;
+PFNGLUNIFORM1FPROC glad_glUniform1f = NULL;
+PFNGLUNIFORM1FVPROC glad_glUniform1fv = NULL;
+PFNGLUNIFORM1IPROC glad_glUniform1i = NULL;
+PFNGLUNIFORM1IVPROC glad_glUniform1iv = NULL;
+PFNGLUNIFORM1UIPROC glad_glUniform1ui = NULL;
+PFNGLUNIFORM1UIVPROC glad_glUniform1uiv = NULL;
+PFNGLUNIFORM2FPROC glad_glUniform2f = NULL;
+PFNGLUNIFORM2FVPROC glad_glUniform2fv = NULL;
+PFNGLUNIFORM2IPROC glad_glUniform2i = NULL;
+PFNGLUNIFORM2IVPROC glad_glUniform2iv = NULL;
+PFNGLUNIFORM2UIPROC glad_glUniform2ui = NULL;
+PFNGLUNIFORM2UIVPROC glad_glUniform2uiv = NULL;
+PFNGLUNIFORM3FPROC glad_glUniform3f = NULL;
+PFNGLUNIFORM3FVPROC glad_glUniform3fv = NULL;
+PFNGLUNIFORM3IPROC glad_glUniform3i = NULL;
+PFNGLUNIFORM3IVPROC glad_glUniform3iv = NULL;
+PFNGLUNIFORM3UIPROC glad_glUniform3ui = NULL;
+PFNGLUNIFORM3UIVPROC glad_glUniform3uiv = NULL;
+PFNGLUNIFORM4FPROC glad_glUniform4f = NULL;
+PFNGLUNIFORM4FVPROC glad_glUniform4fv = NULL;
+PFNGLUNIFORM4IPROC glad_glUniform4i = NULL;
+PFNGLUNIFORM4IVPROC glad_glUniform4iv = NULL;
+PFNGLUNIFORM4UIPROC glad_glUniform4ui = NULL;
+PFNGLUNIFORM4UIVPROC glad_glUniform4uiv = NULL;
+PFNGLUNIFORMBLOCKBINDINGPROC glad_glUniformBlockBinding = NULL;
+PFNGLUNIFORMMATRIX2FVPROC glad_glUniformMatrix2fv = NULL;
+PFNGLUNIFORMMATRIX2X3FVPROC glad_glUniformMatrix2x3fv = NULL;
+PFNGLUNIFORMMATRIX2X4FVPROC glad_glUniformMatrix2x4fv = NULL;
+PFNGLUNIFORMMATRIX3FVPROC glad_glUniformMatrix3fv = NULL;
+PFNGLUNIFORMMATRIX3X2FVPROC glad_glUniformMatrix3x2fv = NULL;
+PFNGLUNIFORMMATRIX3X4FVPROC glad_glUniformMatrix3x4fv = NULL;
+PFNGLUNIFORMMATRIX4FVPROC glad_glUniformMatrix4fv = NULL;
+PFNGLUNIFORMMATRIX4X2FVPROC glad_glUniformMatrix4x2fv = NULL;
+PFNGLUNIFORMMATRIX4X3FVPROC glad_glUniformMatrix4x3fv = NULL;
+PFNGLUNMAPBUFFERPROC glad_glUnmapBuffer = NULL;
+PFNGLUSEPROGRAMPROC glad_glUseProgram = NULL;
+PFNGLVALIDATEPROGRAMPROC glad_glValidateProgram = NULL;
+PFNGLVERTEX2DPROC glad_glVertex2d = NULL;
+PFNGLVERTEX2DVPROC glad_glVertex2dv = NULL;
+PFNGLVERTEX2FPROC glad_glVertex2f = NULL;
+PFNGLVERTEX2FVPROC glad_glVertex2fv = NULL;
+PFNGLVERTEX2IPROC glad_glVertex2i = NULL;
+PFNGLVERTEX2IVPROC glad_glVertex2iv = NULL;
+PFNGLVERTEX2SPROC glad_glVertex2s = NULL;
+PFNGLVERTEX2SVPROC glad_glVertex2sv = NULL;
+PFNGLVERTEX3DPROC glad_glVertex3d = NULL;
+PFNGLVERTEX3DVPROC glad_glVertex3dv = NULL;
+PFNGLVERTEX3FPROC glad_glVertex3f = NULL;
+PFNGLVERTEX3FVPROC glad_glVertex3fv = NULL;
+PFNGLVERTEX3IPROC glad_glVertex3i = NULL;
+PFNGLVERTEX3IVPROC glad_glVertex3iv = NULL;
+PFNGLVERTEX3SPROC glad_glVertex3s = NULL;
+PFNGLVERTEX3SVPROC glad_glVertex3sv = NULL;
+PFNGLVERTEX4DPROC glad_glVertex4d = NULL;
+PFNGLVERTEX4DVPROC glad_glVertex4dv = NULL;
+PFNGLVERTEX4FPROC glad_glVertex4f = NULL;
+PFNGLVERTEX4FVPROC glad_glVertex4fv = NULL;
+PFNGLVERTEX4IPROC glad_glVertex4i = NULL;
+PFNGLVERTEX4IVPROC glad_glVertex4iv = NULL;
+PFNGLVERTEX4SPROC glad_glVertex4s = NULL;
+PFNGLVERTEX4SVPROC glad_glVertex4sv = NULL;
+PFNGLVERTEXATTRIB1DPROC glad_glVertexAttrib1d = NULL;
+PFNGLVERTEXATTRIB1DVPROC glad_glVertexAttrib1dv = NULL;
+PFNGLVERTEXATTRIB1FPROC glad_glVertexAttrib1f = NULL;
+PFNGLVERTEXATTRIB1FVPROC glad_glVertexAttrib1fv = NULL;
+PFNGLVERTEXATTRIB1SPROC glad_glVertexAttrib1s = NULL;
+PFNGLVERTEXATTRIB1SVPROC glad_glVertexAttrib1sv = NULL;
+PFNGLVERTEXATTRIB2DPROC glad_glVertexAttrib2d = NULL;
+PFNGLVERTEXATTRIB2DVPROC glad_glVertexAttrib2dv = NULL;
+PFNGLVERTEXATTRIB2FPROC glad_glVertexAttrib2f = NULL;
+PFNGLVERTEXATTRIB2FVPROC glad_glVertexAttrib2fv = NULL;
+PFNGLVERTEXATTRIB2SPROC glad_glVertexAttrib2s = NULL;
+PFNGLVERTEXATTRIB2SVPROC glad_glVertexAttrib2sv = NULL;
+PFNGLVERTEXATTRIB3DPROC glad_glVertexAttrib3d = NULL;
+PFNGLVERTEXATTRIB3DVPROC glad_glVertexAttrib3dv = NULL;
+PFNGLVERTEXATTRIB3FPROC glad_glVertexAttrib3f = NULL;
+PFNGLVERTEXATTRIB3FVPROC glad_glVertexAttrib3fv = NULL;
+PFNGLVERTEXATTRIB3SPROC glad_glVertexAttrib3s = NULL;
+PFNGLVERTEXATTRIB3SVPROC glad_glVertexAttrib3sv = NULL;
+PFNGLVERTEXATTRIB4NBVPROC glad_glVertexAttrib4Nbv = NULL;
+PFNGLVERTEXATTRIB4NIVPROC glad_glVertexAttrib4Niv = NULL;
+PFNGLVERTEXATTRIB4NSVPROC glad_glVertexAttrib4Nsv = NULL;
+PFNGLVERTEXATTRIB4NUBPROC glad_glVertexAttrib4Nub = NULL;
+PFNGLVERTEXATTRIB4NUBVPROC glad_glVertexAttrib4Nubv = NULL;
+PFNGLVERTEXATTRIB4NUIVPROC glad_glVertexAttrib4Nuiv = NULL;
+PFNGLVERTEXATTRIB4NUSVPROC glad_glVertexAttrib4Nusv = NULL;
+PFNGLVERTEXATTRIB4BVPROC glad_glVertexAttrib4bv = NULL;
+PFNGLVERTEXATTRIB4DPROC glad_glVertexAttrib4d = NULL;
+PFNGLVERTEXATTRIB4DVPROC glad_glVertexAttrib4dv = NULL;
+PFNGLVERTEXATTRIB4FPROC glad_glVertexAttrib4f = NULL;
+PFNGLVERTEXATTRIB4FVPROC glad_glVertexAttrib4fv = NULL;
+PFNGLVERTEXATTRIB4IVPROC glad_glVertexAttrib4iv = NULL;
+PFNGLVERTEXATTRIB4SPROC glad_glVertexAttrib4s = NULL;
+PFNGLVERTEXATTRIB4SVPROC glad_glVertexAttrib4sv = NULL;
+PFNGLVERTEXATTRIB4UBVPROC glad_glVertexAttrib4ubv = NULL;
+PFNGLVERTEXATTRIB4UIVPROC glad_glVertexAttrib4uiv = NULL;
+PFNGLVERTEXATTRIB4USVPROC glad_glVertexAttrib4usv = NULL;
+PFNGLVERTEXATTRIBDIVISORPROC glad_glVertexAttribDivisor = NULL;
+PFNGLVERTEXATTRIBI1IPROC glad_glVertexAttribI1i = NULL;
+PFNGLVERTEXATTRIBI1IVPROC glad_glVertexAttribI1iv = NULL;
+PFNGLVERTEXATTRIBI1UIPROC glad_glVertexAttribI1ui = NULL;
+PFNGLVERTEXATTRIBI1UIVPROC glad_glVertexAttribI1uiv = NULL;
+PFNGLVERTEXATTRIBI2IPROC glad_glVertexAttribI2i = NULL;
+PFNGLVERTEXATTRIBI2IVPROC glad_glVertexAttribI2iv = NULL;
+PFNGLVERTEXATTRIBI2UIPROC glad_glVertexAttribI2ui = NULL;
+PFNGLVERTEXATTRIBI2UIVPROC glad_glVertexAttribI2uiv = NULL;
+PFNGLVERTEXATTRIBI3IPROC glad_glVertexAttribI3i = NULL;
+PFNGLVERTEXATTRIBI3IVPROC glad_glVertexAttribI3iv = NULL;
+PFNGLVERTEXATTRIBI3UIPROC glad_glVertexAttribI3ui = NULL;
+PFNGLVERTEXATTRIBI3UIVPROC glad_glVertexAttribI3uiv = NULL;
+PFNGLVERTEXATTRIBI4BVPROC glad_glVertexAttribI4bv = NULL;
+PFNGLVERTEXATTRIBI4IPROC glad_glVertexAttribI4i = NULL;
+PFNGLVERTEXATTRIBI4IVPROC glad_glVertexAttribI4iv = NULL;
+PFNGLVERTEXATTRIBI4SVPROC glad_glVertexAttribI4sv = NULL;
+PFNGLVERTEXATTRIBI4UBVPROC glad_glVertexAttribI4ubv = NULL;
+PFNGLVERTEXATTRIBI4UIPROC glad_glVertexAttribI4ui = NULL;
+PFNGLVERTEXATTRIBI4UIVPROC glad_glVertexAttribI4uiv = NULL;
+PFNGLVERTEXATTRIBI4USVPROC glad_glVertexAttribI4usv = NULL;
+PFNGLVERTEXATTRIBIPOINTERPROC glad_glVertexAttribIPointer = NULL;
+PFNGLVERTEXATTRIBP1UIPROC glad_glVertexAttribP1ui = NULL;
+PFNGLVERTEXATTRIBP1UIVPROC glad_glVertexAttribP1uiv = NULL;
+PFNGLVERTEXATTRIBP2UIPROC glad_glVertexAttribP2ui = NULL;
+PFNGLVERTEXATTRIBP2UIVPROC glad_glVertexAttribP2uiv = NULL;
+PFNGLVERTEXATTRIBP3UIPROC glad_glVertexAttribP3ui = NULL;
+PFNGLVERTEXATTRIBP3UIVPROC glad_glVertexAttribP3uiv = NULL;
+PFNGLVERTEXATTRIBP4UIPROC glad_glVertexAttribP4ui = NULL;
+PFNGLVERTEXATTRIBP4UIVPROC glad_glVertexAttribP4uiv = NULL;
+PFNGLVERTEXATTRIBPOINTERPROC glad_glVertexAttribPointer = NULL;
+PFNGLVERTEXP2UIPROC glad_glVertexP2ui = NULL;
+PFNGLVERTEXP2UIVPROC glad_glVertexP2uiv = NULL;
+PFNGLVERTEXP3UIPROC glad_glVertexP3ui = NULL;
+PFNGLVERTEXP3UIVPROC glad_glVertexP3uiv = NULL;
+PFNGLVERTEXP4UIPROC glad_glVertexP4ui = NULL;
+PFNGLVERTEXP4UIVPROC glad_glVertexP4uiv = NULL;
+PFNGLVERTEXPOINTERPROC glad_glVertexPointer = NULL;
+PFNGLVIEWPORTPROC glad_glViewport = NULL;
+PFNGLWAITSYNCPROC glad_glWaitSync = NULL;
+PFNGLWINDOWPOS2DPROC glad_glWindowPos2d = NULL;
+PFNGLWINDOWPOS2DVPROC glad_glWindowPos2dv = NULL;
+PFNGLWINDOWPOS2FPROC glad_glWindowPos2f = NULL;
+PFNGLWINDOWPOS2FVPROC glad_glWindowPos2fv = NULL;
+PFNGLWINDOWPOS2IPROC glad_glWindowPos2i = NULL;
+PFNGLWINDOWPOS2IVPROC glad_glWindowPos2iv = NULL;
+PFNGLWINDOWPOS2SPROC glad_glWindowPos2s = NULL;
+PFNGLWINDOWPOS2SVPROC glad_glWindowPos2sv = NULL;
+PFNGLWINDOWPOS3DPROC glad_glWindowPos3d = NULL;
+PFNGLWINDOWPOS3DVPROC glad_glWindowPos3dv = NULL;
+PFNGLWINDOWPOS3FPROC glad_glWindowPos3f = NULL;
+PFNGLWINDOWPOS3FVPROC glad_glWindowPos3fv = NULL;
+PFNGLWINDOWPOS3IPROC glad_glWindowPos3i = NULL;
+PFNGLWINDOWPOS3IVPROC glad_glWindowPos3iv = NULL;
+PFNGLWINDOWPOS3SPROC glad_glWindowPos3s = NULL;
+PFNGLWINDOWPOS3SVPROC glad_glWindowPos3sv = NULL;
+static void load_GL_VERSION_1_0(GLADloadproc load) {
+	if (!GLAD_GL_VERSION_1_0) return;
+	glad_glCullFace = (PFNGLCULLFACEPROC)load("glCullFace");
+	glad_glFrontFace = (PFNGLFRONTFACEPROC)load("glFrontFace");
+	glad_glHint = (PFNGLHINTPROC)load("glHint");
+	glad_glLineWidth = (PFNGLLINEWIDTHPROC)load("glLineWidth");
+	glad_glPointSize = (PFNGLPOINTSIZEPROC)load("glPointSize");
+	glad_glPolygonMode = (PFNGLPOLYGONMODEPROC)load("glPolygonMode");
+	glad_glScissor = (PFNGLSCISSORPROC)load("glScissor");
+	glad_glTexParameterf = (PFNGLTEXPARAMETERFPROC)load("glTexParameterf");
+	glad_glTexParameterfv = (PFNGLTEXPARAMETERFVPROC)load("glTexParameterfv");
+	glad_glTexParameteri = (PFNGLTEXPARAMETERIPROC)load("glTexParameteri");
+	glad_glTexParameteriv = (PFNGLTEXPARAMETERIVPROC)load("glTexParameteriv");
+	glad_glTexImage1D = (PFNGLTEXIMAGE1DPROC)load("glTexImage1D");
+	glad_glTexImage2D = (PFNGLTEXIMAGE2DPROC)load("glTexImage2D");
+	glad_glDrawBuffer = (PFNGLDRAWBUFFERPROC)load("glDrawBuffer");
+	glad_glClear = (PFNGLCLEARPROC)load("glClear");
+	glad_glClearColor = (PFNGLCLEARCOLORPROC)load("glClearColor");
+	glad_glClearStencil = (PFNGLCLEARSTENCILPROC)load("glClearStencil");
+	glad_glClearDepth = (PFNGLCLEARDEPTHPROC)load("glClearDepth");
+	glad_glStencilMask = (PFNGLSTENCILMASKPROC)load("glStencilMask");
+	glad_glColorMask = (PFNGLCOLORMASKPROC)load("glColorMask");
+	glad_glDepthMask = (PFNGLDEPTHMASKPROC)load("glDepthMask");
+	glad_glDisable = (PFNGLDISABLEPROC)load("glDisable");
+	glad_glEnable = (PFNGLENABLEPROC)load("glEnable");
+	glad_glFinish = (PFNGLFINISHPROC)load("glFinish");
+	glad_glFlush = (PFNGLFLUSHPROC)load("glFlush");
+	glad_glBlendFunc = (PFNGLBLENDFUNCPROC)load("glBlendFunc");
+	glad_glLogicOp = (PFNGLLOGICOPPROC)load("glLogicOp");
+	glad_glStencilFunc = (PFNGLSTENCILFUNCPROC)load("glStencilFunc");
+	glad_glStencilOp = (PFNGLSTENCILOPPROC)load("glStencilOp");
+	glad_glDepthFunc = (PFNGLDEPTHFUNCPROC)load("glDepthFunc");
+	glad_glPixelStoref = (PFNGLPIXELSTOREFPROC)load("glPixelStoref");
+	glad_glPixelStorei = (PFNGLPIXELSTOREIPROC)load("glPixelStorei");
+	glad_glReadBuffer = (PFNGLREADBUFFERPROC)load("glReadBuffer");
+	glad_glReadPixels = (PFNGLREADPIXELSPROC)load("glReadPixels");
+	glad_glGetBooleanv = (PFNGLGETBOOLEANVPROC)load("glGetBooleanv");
+	glad_glGetDoublev = (PFNGLGETDOUBLEVPROC)load("glGetDoublev");
+	glad_glGetError = (PFNGLGETERRORPROC)load("glGetError");
+	glad_glGetFloatv = (PFNGLGETFLOATVPROC)load("glGetFloatv");
+	glad_glGetIntegerv = (PFNGLGETINTEGERVPROC)load("glGetIntegerv");
+	glad_glGetString = (PFNGLGETSTRINGPROC)load("glGetString");
+	glad_glGetTexImage = (PFNGLGETTEXIMAGEPROC)load("glGetTexImage");
+	glad_glGetTexParameterfv = (PFNGLGETTEXPARAMETERFVPROC)load("glGetTexParameterfv");
+	glad_glGetTexParameteriv = (PFNGLGETTEXPARAMETERIVPROC)load("glGetTexParameteriv");
+	glad_glGetTexLevelParameterfv = (PFNGLGETTEXLEVELPARAMETERFVPROC)load("glGetTexLevelParameterfv");
+	glad_glGetTexLevelParameteriv = (PFNGLGETTEXLEVELPARAMETERIVPROC)load("glGetTexLevelParameteriv");
+	glad_glIsEnabled = (PFNGLISENABLEDPROC)load("glIsEnabled");
+	glad_glDepthRange = (PFNGLDEPTHRANGEPROC)load("glDepthRange");
+	glad_glViewport = (PFNGLVIEWPORTPROC)load("glViewport");
+	glad_glNewList = (PFNGLNEWLISTPROC)load("glNewList");
+	glad_glEndList = (PFNGLENDLISTPROC)load("glEndList");
+	glad_glCallList = (PFNGLCALLLISTPROC)load("glCallList");
+	glad_glCallLists = (PFNGLCALLLISTSPROC)load("glCallLists");
+	glad_glDeleteLists = (PFNGLDELETELISTSPROC)load("glDeleteLists");
+	glad_glGenLists = (PFNGLGENLISTSPROC)load("glGenLists");
+	glad_glListBase = (PFNGLLISTBASEPROC)load("glListBase");
+	glad_glBegin = (PFNGLBEGINPROC)load("glBegin");
+	glad_glBitmap = (PFNGLBITMAPPROC)load("glBitmap");
+	glad_glColor3b = (PFNGLCOLOR3BPROC)load("glColor3b");
+	glad_glColor3bv = (PFNGLCOLOR3BVPROC)load("glColor3bv");
+	glad_glColor3d = (PFNGLCOLOR3DPROC)load("glColor3d");
+	glad_glColor3dv = (PFNGLCOLOR3DVPROC)load("glColor3dv");
+	glad_glColor3f = (PFNGLCOLOR3FPROC)load("glColor3f");
+	glad_glColor3fv = (PFNGLCOLOR3FVPROC)load("glColor3fv");
+	glad_glColor3i = (PFNGLCOLOR3IPROC)load("glColor3i");
+	glad_glColor3iv = (PFNGLCOLOR3IVPROC)load("glColor3iv");
+	glad_glColor3s = (PFNGLCOLOR3SPROC)load("glColor3s");
+	glad_glColor3sv = (PFNGLCOLOR3SVPROC)load("glColor3sv");
+	glad_glColor3ub = (PFNGLCOLOR3UBPROC)load("glColor3ub");
+	glad_glColor3ubv = (PFNGLCOLOR3UBVPROC)load("glColor3ubv");
+	glad_glColor3ui = (PFNGLCOLOR3UIPROC)load("glColor3ui");
+	glad_glColor3uiv = (PFNGLCOLOR3UIVPROC)load("glColor3uiv");
+	glad_glColor3us = (PFNGLCOLOR3USPROC)load("glColor3us");
+	glad_glColor3usv = (PFNGLCOLOR3USVPROC)load("glColor3usv");
+	glad_glColor4b = (PFNGLCOLOR4BPROC)load("glColor4b");
+	glad_glColor4bv = (PFNGLCOLOR4BVPROC)load("glColor4bv");
+	glad_glColor4d = (PFNGLCOLOR4DPROC)load("glColor4d");
+	glad_glColor4dv = (PFNGLCOLOR4DVPROC)load("glColor4dv");
+	glad_glColor4f = (PFNGLCOLOR4FPROC)load("glColor4f");
+	glad_glColor4fv = (PFNGLCOLOR4FVPROC)load("glColor4fv");
+	glad_glColor4i = (PFNGLCOLOR4IPROC)load("glColor4i");
+	glad_glColor4iv = (PFNGLCOLOR4IVPROC)load("glColor4iv");
+	glad_glColor4s = (PFNGLCOLOR4SPROC)load("glColor4s");
+	glad_glColor4sv = (PFNGLCOLOR4SVPROC)load("glColor4sv");
+	glad_glColor4ub = (PFNGLCOLOR4UBPROC)load("glColor4ub");
+	glad_glColor4ubv = (PFNGLCOLOR4UBVPROC)load("glColor4ubv");
+	glad_glColor4ui = (PFNGLCOLOR4UIPROC)load("glColor4ui");
+	glad_glColor4uiv = (PFNGLCOLOR4UIVPROC)load("glColor4uiv");
+	glad_glColor4us = (PFNGLCOLOR4USPROC)load("glColor4us");
+	glad_glColor4usv = (PFNGLCOLOR4USVPROC)load("glColor4usv");
+	glad_glEdgeFlag = (PFNGLEDGEFLAGPROC)load("glEdgeFlag");
+	glad_glEdgeFlagv = (PFNGLEDGEFLAGVPROC)load("glEdgeFlagv");
+	glad_glEnd = (PFNGLENDPROC)load("glEnd");
+	glad_glIndexd = (PFNGLINDEXDPROC)load("glIndexd");
+	glad_glIndexdv = (PFNGLINDEXDVPROC)load("glIndexdv");
+	glad_glIndexf = (PFNGLINDEXFPROC)load("glIndexf");
+	glad_glIndexfv = (PFNGLINDEXFVPROC)load("glIndexfv");
+	glad_glIndexi = (PFNGLINDEXIPROC)load("glIndexi");
+	glad_glIndexiv = (PFNGLINDEXIVPROC)load("glIndexiv");
+	glad_glIndexs = (PFNGLINDEXSPROC)load("glIndexs");
+	glad_glIndexsv = (PFNGLINDEXSVPROC)load("glIndexsv");
+	glad_glNormal3b = (PFNGLNORMAL3BPROC)load("glNormal3b");
+	glad_glNormal3bv = (PFNGLNORMAL3BVPROC)load("glNormal3bv");
+	glad_glNormal3d = (PFNGLNORMAL3DPROC)load("glNormal3d");
+	glad_glNormal3dv = (PFNGLNORMAL3DVPROC)load("glNormal3dv");
+	glad_glNormal3f = (PFNGLNORMAL3FPROC)load("glNormal3f");
+	glad_glNormal3fv = (PFNGLNORMAL3FVPROC)load("glNormal3fv");
+	glad_glNormal3i = (PFNGLNORMAL3IPROC)load("glNormal3i");
+	glad_glNormal3iv = (PFNGLNORMAL3IVPROC)load("glNormal3iv");
+	glad_glNormal3s = (PFNGLNORMAL3SPROC)load("glNormal3s");
+	glad_glNormal3sv = (PFNGLNORMAL3SVPROC)load("glNormal3sv");
+	glad_glRasterPos2d = (PFNGLRASTERPOS2DPROC)load("glRasterPos2d");
+	glad_glRasterPos2dv = (PFNGLRASTERPOS2DVPROC)load("glRasterPos2dv");
+	glad_glRasterPos2f = (PFNGLRASTERPOS2FPROC)load("glRasterPos2f");
+	glad_glRasterPos2fv = (PFNGLRASTERPOS2FVPROC)load("glRasterPos2fv");
+	glad_glRasterPos2i = (PFNGLRASTERPOS2IPROC)load("glRasterPos2i");
+	glad_glRasterPos2iv = (PFNGLRASTERPOS2IVPROC)load("glRasterPos2iv");
+	glad_glRasterPos2s = (PFNGLRASTERPOS2SPROC)load("glRasterPos2s");
+	glad_glRasterPos2sv = (PFNGLRASTERPOS2SVPROC)load("glRasterPos2sv");
+	glad_glRasterPos3d = (PFNGLRASTERPOS3DPROC)load("glRasterPos3d");
+	glad_glRasterPos3dv = (PFNGLRASTERPOS3DVPROC)load("glRasterPos3dv");
+	glad_glRasterPos3f = (PFNGLRASTERPOS3FPROC)load("glRasterPos3f");
+	glad_glRasterPos3fv = (PFNGLRASTERPOS3FVPROC)load("glRasterPos3fv");
+	glad_glRasterPos3i = (PFNGLRASTERPOS3IPROC)load("glRasterPos3i");
+	glad_glRasterPos3iv = (PFNGLRASTERPOS3IVPROC)load("glRasterPos3iv");
+	glad_glRasterPos3s = (PFNGLRASTERPOS3SPROC)load("glRasterPos3s");
+	glad_glRasterPos3sv = (PFNGLRASTERPOS3SVPROC)load("glRasterPos3sv");
+	glad_glRasterPos4d = (PFNGLRASTERPOS4DPROC)load("glRasterPos4d");
+	glad_glRasterPos4dv = (PFNGLRASTERPOS4DVPROC)load("glRasterPos4dv");
+	glad_glRasterPos4f = (PFNGLRASTERPOS4FPROC)load("glRasterPos4f");
+	glad_glRasterPos4fv = (PFNGLRASTERPOS4FVPROC)load("glRasterPos4fv");
+	glad_glRasterPos4i = (PFNGLRASTERPOS4IPROC)load("glRasterPos4i");
+	glad_glRasterPos4iv = (PFNGLRASTERPOS4IVPROC)load("glRasterPos4iv");
+	glad_glRasterPos4s = (PFNGLRASTERPOS4SPROC)load("glRasterPos4s");
+	glad_glRasterPos4sv = (PFNGLRASTERPOS4SVPROC)load("glRasterPos4sv");
+	glad_glRectd = (PFNGLRECTDPROC)load("glRectd");
+	glad_glRectdv = (PFNGLRECTDVPROC)load("glRectdv");
+	glad_glRectf = (PFNGLRECTFPROC)load("glRectf");
+	glad_glRectfv = (PFNGLRECTFVPROC)load("glRectfv");
+	glad_glRecti = (PFNGLRECTIPROC)load("glRecti");
+	glad_glRectiv = (PFNGLRECTIVPROC)load("glRectiv");
+	glad_glRects = (PFNGLRECTSPROC)load("glRects");
+	glad_glRectsv = (PFNGLRECTSVPROC)load("glRectsv");
+	glad_glTexCoord1d = (PFNGLTEXCOORD1DPROC)load("glTexCoord1d");
+	glad_glTexCoord1dv = (PFNGLTEXCOORD1DVPROC)load("glTexCoord1dv");
+	glad_glTexCoord1f = (PFNGLTEXCOORD1FPROC)load("glTexCoord1f");
+	glad_glTexCoord1fv = (PFNGLTEXCOORD1FVPROC)load("glTexCoord1fv");
+	glad_glTexCoord1i = (PFNGLTEXCOORD1IPROC)load("glTexCoord1i");
+	glad_glTexCoord1iv = (PFNGLTEXCOORD1IVPROC)load("glTexCoord1iv");
+	glad_glTexCoord1s = (PFNGLTEXCOORD1SPROC)load("glTexCoord1s");
+	glad_glTexCoord1sv = (PFNGLTEXCOORD1SVPROC)load("glTexCoord1sv");
+	glad_glTexCoord2d = (PFNGLTEXCOORD2DPROC)load("glTexCoord2d");
+	glad_glTexCoord2dv = (PFNGLTEXCOORD2DVPROC)load("glTexCoord2dv");
+	glad_glTexCoord2f = (PFNGLTEXCOORD2FPROC)load("glTexCoord2f");
+	glad_glTexCoord2fv = (PFNGLTEXCOORD2FVPROC)load("glTexCoord2fv");
+	glad_glTexCoord2i = (PFNGLTEXCOORD2IPROC)load("glTexCoord2i");
+	glad_glTexCoord2iv = (PFNGLTEXCOORD2IVPROC)load("glTexCoord2iv");
+	glad_glTexCoord2s = (PFNGLTEXCOORD2SPROC)load("glTexCoord2s");
+	glad_glTexCoord2sv = (PFNGLTEXCOORD2SVPROC)load("glTexCoord2sv");
+	glad_glTexCoord3d = (PFNGLTEXCOORD3DPROC)load("glTexCoord3d");
+	glad_glTexCoord3dv = (PFNGLTEXCOORD3DVPROC)load("glTexCoord3dv");
+	glad_glTexCoord3f = (PFNGLTEXCOORD3FPROC)load("glTexCoord3f");
+	glad_glTexCoord3fv = (PFNGLTEXCOORD3FVPROC)load("glTexCoord3fv");
+	glad_glTexCoord3i = (PFNGLTEXCOORD3IPROC)load("glTexCoord3i");
+	glad_glTexCoord3iv = (PFNGLTEXCOORD3IVPROC)load("glTexCoord3iv");
+	glad_glTexCoord3s = (PFNGLTEXCOORD3SPROC)load("glTexCoord3s");
+	glad_glTexCoord3sv = (PFNGLTEXCOORD3SVPROC)load("glTexCoord3sv");
+	glad_glTexCoord4d = (PFNGLTEXCOORD4DPROC)load("glTexCoord4d");
+	glad_glTexCoord4dv = (PFNGLTEXCOORD4DVPROC)load("glTexCoord4dv");
+	glad_glTexCoord4f = (PFNGLTEXCOORD4FPROC)load("glTexCoord4f");
+	glad_glTexCoord4fv = (PFNGLTEXCOORD4FVPROC)load("glTexCoord4fv");
+	glad_glTexCoord4i = (PFNGLTEXCOORD4IPROC)load("glTexCoord4i");
+	glad_glTexCoord4iv = (PFNGLTEXCOORD4IVPROC)load("glTexCoord4iv");
+	glad_glTexCoord4s = (PFNGLTEXCOORD4SPROC)load("glTexCoord4s");
+	glad_glTexCoord4sv = (PFNGLTEXCOORD4SVPROC)load("glTexCoord4sv");
+	glad_glVertex2d = (PFNGLVERTEX2DPROC)load("glVertex2d");
+	glad_glVertex2dv = (PFNGLVERTEX2DVPROC)load("glVertex2dv");
+	glad_glVertex2f = (PFNGLVERTEX2FPROC)load("glVertex2f");
+	glad_glVertex2fv = (PFNGLVERTEX2FVPROC)load("glVertex2fv");
+	glad_glVertex2i = (PFNGLVERTEX2IPROC)load("glVertex2i");
+	glad_glVertex2iv = (PFNGLVERTEX2IVPROC)load("glVertex2iv");
+	glad_glVertex2s = (PFNGLVERTEX2SPROC)load("glVertex2s");
+	glad_glVertex2sv = (PFNGLVERTEX2SVPROC)load("glVertex2sv");
+	glad_glVertex3d = (PFNGLVERTEX3DPROC)load("glVertex3d");
+	glad_glVertex3dv = (PFNGLVERTEX3DVPROC)load("glVertex3dv");
+	glad_glVertex3f = (PFNGLVERTEX3FPROC)load("glVertex3f");
+	glad_glVertex3fv = (PFNGLVERTEX3FVPROC)load("glVertex3fv");
+	glad_glVertex3i = (PFNGLVERTEX3IPROC)load("glVertex3i");
+	glad_glVertex3iv = (PFNGLVERTEX3IVPROC)load("glVertex3iv");
+	glad_glVertex3s = (PFNGLVERTEX3SPROC)load("glVertex3s");
+	glad_glVertex3sv = (PFNGLVERTEX3SVPROC)load("glVertex3sv");
+	glad_glVertex4d = (PFNGLVERTEX4DPROC)load("glVertex4d");
+	glad_glVertex4dv = (PFNGLVERTEX4DVPROC)load("glVertex4dv");
+	glad_glVertex4f = (PFNGLVERTEX4FPROC)load("glVertex4f");
+	glad_glVertex4fv = (PFNGLVERTEX4FVPROC)load("glVertex4fv");
+	glad_glVertex4i = (PFNGLVERTEX4IPROC)load("glVertex4i");
+	glad_glVertex4iv = (PFNGLVERTEX4IVPROC)load("glVertex4iv");
+	glad_glVertex4s = (PFNGLVERTEX4SPROC)load("glVertex4s");
+	glad_glVertex4sv = (PFNGLVERTEX4SVPROC)load("glVertex4sv");
+	glad_glClipPlane = (PFNGLCLIPPLANEPROC)load("glClipPlane");
+	glad_glColorMaterial = (PFNGLCOLORMATERIALPROC)load("glColorMaterial");
+	glad_glFogf = (PFNGLFOGFPROC)load("glFogf");
+	glad_glFogfv = (PFNGLFOGFVPROC)load("glFogfv");
+	glad_glFogi = (PFNGLFOGIPROC)load("glFogi");
+	glad_glFogiv = (PFNGLFOGIVPROC)load("glFogiv");
+	glad_glLightf = (PFNGLLIGHTFPROC)load("glLightf");
+	glad_glLightfv = (PFNGLLIGHTFVPROC)load("glLightfv");
+	glad_glLighti = (PFNGLLIGHTIPROC)load("glLighti");
+	glad_glLightiv = (PFNGLLIGHTIVPROC)load("glLightiv");
+	glad_glLightModelf = (PFNGLLIGHTMODELFPROC)load("glLightModelf");
+	glad_glLightModelfv = (PFNGLLIGHTMODELFVPROC)load("glLightModelfv");
+	glad_glLightModeli = (PFNGLLIGHTMODELIPROC)load("glLightModeli");
+	glad_glLightModeliv = (PFNGLLIGHTMODELIVPROC)load("glLightModeliv");
+	glad_glLineStipple = (PFNGLLINESTIPPLEPROC)load("glLineStipple");
+	glad_glMaterialf = (PFNGLMATERIALFPROC)load("glMaterialf");
+	glad_glMaterialfv = (PFNGLMATERIALFVPROC)load("glMaterialfv");
+	glad_glMateriali = (PFNGLMATERIALIPROC)load("glMateriali");
+	glad_glMaterialiv = (PFNGLMATERIALIVPROC)load("glMaterialiv");
+	glad_glPolygonStipple = (PFNGLPOLYGONSTIPPLEPROC)load("glPolygonStipple");
+	glad_glShadeModel = (PFNGLSHADEMODELPROC)load("glShadeModel");
+	glad_glTexEnvf = (PFNGLTEXENVFPROC)load("glTexEnvf");
+	glad_glTexEnvfv = (PFNGLTEXENVFVPROC)load("glTexEnvfv");
+	glad_glTexEnvi = (PFNGLTEXENVIPROC)load("glTexEnvi");
+	glad_glTexEnviv = (PFNGLTEXENVIVPROC)load("glTexEnviv");
+	glad_glTexGend = (PFNGLTEXGENDPROC)load("glTexGend");
+	glad_glTexGendv = (PFNGLTEXGENDVPROC)load("glTexGendv");
+	glad_glTexGenf = (PFNGLTEXGENFPROC)load("glTexGenf");
+	glad_glTexGenfv = (PFNGLTEXGENFVPROC)load("glTexGenfv");
+	glad_glTexGeni = (PFNGLTEXGENIPROC)load("glTexGeni");
+	glad_glTexGeniv = (PFNGLTEXGENIVPROC)load("glTexGeniv");
+	glad_glFeedbackBuffer = (PFNGLFEEDBACKBUFFERPROC)load("glFeedbackBuffer");
+	glad_glSelectBuffer = (PFNGLSELECTBUFFERPROC)load("glSelectBuffer");
+	glad_glRenderMode = (PFNGLRENDERMODEPROC)load("glRenderMode");
+	glad_glInitNames = (PFNGLINITNAMESPROC)load("glInitNames");
+	glad_glLoadName = (PFNGLLOADNAMEPROC)load("glLoadName");
+	glad_glPassThrough = (PFNGLPASSTHROUGHPROC)load("glPassThrough");
+	glad_glPopName = (PFNGLPOPNAMEPROC)load("glPopName");
+	glad_glPushName = (PFNGLPUSHNAMEPROC)load("glPushName");
+	glad_glClearAccum = (PFNGLCLEARACCUMPROC)load("glClearAccum");
+	glad_glClearIndex = (PFNGLCLEARINDEXPROC)load("glClearIndex");
+	glad_glIndexMask = (PFNGLINDEXMASKPROC)load("glIndexMask");
+	glad_glAccum = (PFNGLACCUMPROC)load("glAccum");
+	glad_glPopAttrib = (PFNGLPOPATTRIBPROC)load("glPopAttrib");
+	glad_glPushAttrib = (PFNGLPUSHATTRIBPROC)load("glPushAttrib");
+	glad_glMap1d = (PFNGLMAP1DPROC)load("glMap1d");
+	glad_glMap1f = (PFNGLMAP1FPROC)load("glMap1f");
+	glad_glMap2d = (PFNGLMAP2DPROC)load("glMap2d");
+	glad_glMap2f = (PFNGLMAP2FPROC)load("glMap2f");
+	glad_glMapGrid1d = (PFNGLMAPGRID1DPROC)load("glMapGrid1d");
+	glad_glMapGrid1f = (PFNGLMAPGRID1FPROC)load("glMapGrid1f");
+	glad_glMapGrid2d = (PFNGLMAPGRID2DPROC)load("glMapGrid2d");
+	glad_glMapGrid2f = (PFNGLMAPGRID2FPROC)load("glMapGrid2f");
+	glad_glEvalCoord1d = (PFNGLEVALCOORD1DPROC)load("glEvalCoord1d");
+	glad_glEvalCoord1dv = (PFNGLEVALCOORD1DVPROC)load("glEvalCoord1dv");
+	glad_glEvalCoord1f = (PFNGLEVALCOORD1FPROC)load("glEvalCoord1f");
+	glad_glEvalCoord1fv = (PFNGLEVALCOORD1FVPROC)load("glEvalCoord1fv");
+	glad_glEvalCoord2d = (PFNGLEVALCOORD2DPROC)load("glEvalCoord2d");
+	glad_glEvalCoord2dv = (PFNGLEVALCOORD2DVPROC)load("glEvalCoord2dv");
+	glad_glEvalCoord2f = (PFNGLEVALCOORD2FPROC)load("glEvalCoord2f");
+	glad_glEvalCoord2fv = (PFNGLEVALCOORD2FVPROC)load("glEvalCoord2fv");
+	glad_glEvalMesh1 = (PFNGLEVALMESH1PROC)load("glEvalMesh1");
+	glad_glEvalPoint1 = (PFNGLEVALPOINT1PROC)load("glEvalPoint1");
+	glad_glEvalMesh2 = (PFNGLEVALMESH2PROC)load("glEvalMesh2");
+	glad_glEvalPoint2 = (PFNGLEVALPOINT2PROC)load("glEvalPoint2");
+	glad_glAlphaFunc = (PFNGLALPHAFUNCPROC)load("glAlphaFunc");
+	glad_glPixelZoom = (PFNGLPIXELZOOMPROC)load("glPixelZoom");
+	glad_glPixelTransferf = (PFNGLPIXELTRANSFERFPROC)load("glPixelTransferf");
+	glad_glPixelTransferi = (PFNGLPIXELTRANSFERIPROC)load("glPixelTransferi");
+	glad_glPixelMapfv = (PFNGLPIXELMAPFVPROC)load("glPixelMapfv");
+	glad_glPixelMapuiv = (PFNGLPIXELMAPUIVPROC)load("glPixelMapuiv");
+	glad_glPixelMapusv = (PFNGLPIXELMAPUSVPROC)load("glPixelMapusv");
+	glad_glCopyPixels = (PFNGLCOPYPIXELSPROC)load("glCopyPixels");
+	glad_glDrawPixels = (PFNGLDRAWPIXELSPROC)load("glDrawPixels");
+	glad_glGetClipPlane = (PFNGLGETCLIPPLANEPROC)load("glGetClipPlane");
+	glad_glGetLightfv = (PFNGLGETLIGHTFVPROC)load("glGetLightfv");
+	glad_glGetLightiv = (PFNGLGETLIGHTIVPROC)load("glGetLightiv");
+	glad_glGetMapdv = (PFNGLGETMAPDVPROC)load("glGetMapdv");
+	glad_glGetMapfv = (PFNGLGETMAPFVPROC)load("glGetMapfv");
+	glad_glGetMapiv = (PFNGLGETMAPIVPROC)load("glGetMapiv");
+	glad_glGetMaterialfv = (PFNGLGETMATERIALFVPROC)load("glGetMaterialfv");
+	glad_glGetMaterialiv = (PFNGLGETMATERIALIVPROC)load("glGetMaterialiv");
+	glad_glGetPixelMapfv = (PFNGLGETPIXELMAPFVPROC)load("glGetPixelMapfv");
+	glad_glGetPixelMapuiv = (PFNGLGETPIXELMAPUIVPROC)load("glGetPixelMapuiv");
+	glad_glGetPixelMapusv = (PFNGLGETPIXELMAPUSVPROC)load("glGetPixelMapusv");
+	glad_glGetPolygonStipple = (PFNGLGETPOLYGONSTIPPLEPROC)load("glGetPolygonStipple");
+	glad_glGetTexEnvfv = (PFNGLGETTEXENVFVPROC)load("glGetTexEnvfv");
+	glad_glGetTexEnviv = (PFNGLGETTEXENVIVPROC)load("glGetTexEnviv");
+	glad_glGetTexGendv = (PFNGLGETTEXGENDVPROC)load("glGetTexGendv");
+	glad_glGetTexGenfv = (PFNGLGETTEXGENFVPROC)load("glGetTexGenfv");
+	glad_glGetTexGeniv = (PFNGLGETTEXGENIVPROC)load("glGetTexGeniv");
+	glad_glIsList = (PFNGLISLISTPROC)load("glIsList");
+	glad_glFrustum = (PFNGLFRUSTUMPROC)load("glFrustum");
+	glad_glLoadIdentity = (PFNGLLOADIDENTITYPROC)load("glLoadIdentity");
+	glad_glLoadMatrixf = (PFNGLLOADMATRIXFPROC)load("glLoadMatrixf");
+	glad_glLoadMatrixd = (PFNGLLOADMATRIXDPROC)load("glLoadMatrixd");
+	glad_glMatrixMode = (PFNGLMATRIXMODEPROC)load("glMatrixMode");
+	glad_glMultMatrixf = (PFNGLMULTMATRIXFPROC)load("glMultMatrixf");
+	glad_glMultMatrixd = (PFNGLMULTMATRIXDPROC)load("glMultMatrixd");
+	glad_glOrtho = (PFNGLORTHOPROC)load("glOrtho");
+	glad_glPopMatrix = (PFNGLPOPMATRIXPROC)load("glPopMatrix");
+	glad_glPushMatrix = (PFNGLPUSHMATRIXPROC)load("glPushMatrix");
+	glad_glRotated = (PFNGLROTATEDPROC)load("glRotated");
+	glad_glRotatef = (PFNGLROTATEFPROC)load("glRotatef");
+	glad_glScaled = (PFNGLSCALEDPROC)load("glScaled");
+	glad_glScalef = (PFNGLSCALEFPROC)load("glScalef");
+	glad_glTranslated = (PFNGLTRANSLATEDPROC)load("glTranslated");
+	glad_glTranslatef = (PFNGLTRANSLATEFPROC)load("glTranslatef");
+}
+static void load_GL_VERSION_1_1(GLADloadproc load) {
+	if (!GLAD_GL_VERSION_1_1) return;
+	glad_glDrawArrays = (PFNGLDRAWARRAYSPROC)load("glDrawArrays");
+	glad_glDrawElements = (PFNGLDRAWELEMENTSPROC)load("glDrawElements");
+	glad_glGetPointerv = (PFNGLGETPOINTERVPROC)load("glGetPointerv");
+	glad_glPolygonOffset = (PFNGLPOLYGONOFFSETPROC)load("glPolygonOffset");
+	glad_glCopyTexImage1D = (PFNGLCOPYTEXIMAGE1DPROC)load("glCopyTexImage1D");
+	glad_glCopyTexImage2D = (PFNGLCOPYTEXIMAGE2DPROC)load("glCopyTexImage2D");
+	glad_glCopyTexSubImage1D = (PFNGLCOPYTEXSUBIMAGE1DPROC)load("glCopyTexSubImage1D");
+	glad_glCopyTexSubImage2D = (PFNGLCOPYTEXSUBIMAGE2DPROC)load("glCopyTexSubImage2D");
+	glad_glTexSubImage1D = (PFNGLTEXSUBIMAGE1DPROC)load("glTexSubImage1D");
+	glad_glTexSubImage2D = (PFNGLTEXSUBIMAGE2DPROC)load("glTexSubImage2D");
+	glad_glBindTexture = (PFNGLBINDTEXTUREPROC)load("glBindTexture");
+	glad_glDeleteTextures = (PFNGLDELETETEXTURESPROC)load("glDeleteTextures");
+	glad_glGenTextures = (PFNGLGENTEXTURESPROC)load("glGenTextures");
+	glad_glIsTexture = (PFNGLISTEXTUREPROC)load("glIsTexture");
+	glad_glArrayElement = (PFNGLARRAYELEMENTPROC)load("glArrayElement");
+	glad_glColorPointer = (PFNGLCOLORPOINTERPROC)load("glColorPointer");
+	glad_glDisableClientState = (PFNGLDISABLECLIENTSTATEPROC)load("glDisableClientState");
+	glad_glEdgeFlagPointer = (PFNGLEDGEFLAGPOINTERPROC)load("glEdgeFlagPointer");
+	glad_glEnableClientState = (PFNGLENABLECLIENTSTATEPROC)load("glEnableClientState");
+	glad_glIndexPointer = (PFNGLINDEXPOINTERPROC)load("glIndexPointer");
+	glad_glInterleavedArrays = (PFNGLINTERLEAVEDARRAYSPROC)load("glInterleavedArrays");
+	glad_glNormalPointer = (PFNGLNORMALPOINTERPROC)load("glNormalPointer");
+	glad_glTexCoordPointer = (PFNGLTEXCOORDPOINTERPROC)load("glTexCoordPointer");
+	glad_glVertexPointer = (PFNGLVERTEXPOINTERPROC)load("glVertexPointer");
+	glad_glAreTexturesResident = (PFNGLARETEXTURESRESIDENTPROC)load("glAreTexturesResident");
+	glad_glPrioritizeTextures = (PFNGLPRIORITIZETEXTURESPROC)load("glPrioritizeTextures");
+	glad_glIndexub = (PFNGLINDEXUBPROC)load("glIndexub");
+	glad_glIndexubv = (PFNGLINDEXUBVPROC)load("glIndexubv");
+	glad_glPopClientAttrib = (PFNGLPOPCLIENTATTRIBPROC)load("glPopClientAttrib");
+	glad_glPushClientAttrib = (PFNGLPUSHCLIENTATTRIBPROC)load("glPushClientAttrib");
+}
+static void load_GL_VERSION_1_2(GLADloadproc load) {
+	if (!GLAD_GL_VERSION_1_2) return;
+	glad_glDrawRangeElements = (PFNGLDRAWRANGEELEMENTSPROC)load("glDrawRangeElements");
+	glad_glTexImage3D = (PFNGLTEXIMAGE3DPROC)load("glTexImage3D");
+	glad_glTexSubImage3D = (PFNGLTEXSUBIMAGE3DPROC)load("glTexSubImage3D");
+	glad_glCopyTexSubImage3D = (PFNGLCOPYTEXSUBIMAGE3DPROC)load("glCopyTexSubImage3D");
+}
+static void load_GL_VERSION_1_3(GLADloadproc load) {
+	if (!GLAD_GL_VERSION_1_3) return;
+	glad_glActiveTexture = (PFNGLACTIVETEXTUREPROC)load("glActiveTexture");
+	glad_glSampleCoverage = (PFNGLSAMPLECOVERAGEPROC)load("glSampleCoverage");
+	glad_glCompressedTexImage3D = (PFNGLCOMPRESSEDTEXIMAGE3DPROC)load("glCompressedTexImage3D");
+	glad_glCompressedTexImage2D = (PFNGLCOMPRESSEDTEXIMAGE2DPROC)load("glCompressedTexImage2D");
+	glad_glCompressedTexImage1D = (PFNGLCOMPRESSEDTEXIMAGE1DPROC)load("glCompressedTexImage1D");
+	glad_glCompressedTexSubImage3D = (PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC)load("glCompressedTexSubImage3D");
+	glad_glCompressedTexSubImage2D = (PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC)load("glCompressedTexSubImage2D");
+	glad_glCompressedTexSubImage1D = (PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC)load("glCompressedTexSubImage1D");
+	glad_glGetCompressedTexImage = (PFNGLGETCOMPRESSEDTEXIMAGEPROC)load("glGetCompressedTexImage");
+	glad_glClientActiveTexture = (PFNGLCLIENTACTIVETEXTUREPROC)load("glClientActiveTexture");
+	glad_glMultiTexCoord1d = (PFNGLMULTITEXCOORD1DPROC)load("glMultiTexCoord1d");
+	glad_glMultiTexCoord1dv = (PFNGLMULTITEXCOORD1DVPROC)load("glMultiTexCoord1dv");
+	glad_glMultiTexCoord1f = (PFNGLMULTITEXCOORD1FPROC)load("glMultiTexCoord1f");
+	glad_glMultiTexCoord1fv = (PFNGLMULTITEXCOORD1FVPROC)load("glMultiTexCoord1fv");
+	glad_glMultiTexCoord1i = (PFNGLMULTITEXCOORD1IPROC)load("glMultiTexCoord1i");
+	glad_glMultiTexCoord1iv = (PFNGLMULTITEXCOORD1IVPROC)load("glMultiTexCoord1iv");
+	glad_glMultiTexCoord1s = (PFNGLMULTITEXCOORD1SPROC)load("glMultiTexCoord1s");
+	glad_glMultiTexCoord1sv = (PFNGLMULTITEXCOORD1SVPROC)load("glMultiTexCoord1sv");
+	glad_glMultiTexCoord2d = (PFNGLMULTITEXCOORD2DPROC)load("glMultiTexCoord2d");
+	glad_glMultiTexCoord2dv = (PFNGLMULTITEXCOORD2DVPROC)load("glMultiTexCoord2dv");
+	glad_glMultiTexCoord2f = (PFNGLMULTITEXCOORD2FPROC)load("glMultiTexCoord2f");
+	glad_glMultiTexCoord2fv = (PFNGLMULTITEXCOORD2FVPROC)load("glMultiTexCoord2fv");
+	glad_glMultiTexCoord2i = (PFNGLMULTITEXCOORD2IPROC)load("glMultiTexCoord2i");
+	glad_glMultiTexCoord2iv = (PFNGLMULTITEXCOORD2IVPROC)load("glMultiTexCoord2iv");
+	glad_glMultiTexCoord2s = (PFNGLMULTITEXCOORD2SPROC)load("glMultiTexCoord2s");
+	glad_glMultiTexCoord2sv = (PFNGLMULTITEXCOORD2SVPROC)load("glMultiTexCoord2sv");
+	glad_glMultiTexCoord3d = (PFNGLMULTITEXCOORD3DPROC)load("glMultiTexCoord3d");
+	glad_glMultiTexCoord3dv = (PFNGLMULTITEXCOORD3DVPROC)load("glMultiTexCoord3dv");
+	glad_glMultiTexCoord3f = (PFNGLMULTITEXCOORD3FPROC)load("glMultiTexCoord3f");
+	glad_glMultiTexCoord3fv = (PFNGLMULTITEXCOORD3FVPROC)load("glMultiTexCoord3fv");
+	glad_glMultiTexCoord3i = (PFNGLMULTITEXCOORD3IPROC)load("glMultiTexCoord3i");
+	glad_glMultiTexCoord3iv = (PFNGLMULTITEXCOORD3IVPROC)load("glMultiTexCoord3iv");
+	glad_glMultiTexCoord3s = (PFNGLMULTITEXCOORD3SPROC)load("glMultiTexCoord3s");
+	glad_glMultiTexCoord3sv = (PFNGLMULTITEXCOORD3SVPROC)load("glMultiTexCoord3sv");
+	glad_glMultiTexCoord4d = (PFNGLMULTITEXCOORD4DPROC)load("glMultiTexCoord4d");
+	glad_glMultiTexCoord4dv = (PFNGLMULTITEXCOORD4DVPROC)load("glMultiTexCoord4dv");
+	glad_glMultiTexCoord4f = (PFNGLMULTITEXCOORD4FPROC)load("glMultiTexCoord4f");
+	glad_glMultiTexCoord4fv = (PFNGLMULTITEXCOORD4FVPROC)load("glMultiTexCoord4fv");
+	glad_glMultiTexCoord4i = (PFNGLMULTITEXCOORD4IPROC)load("glMultiTexCoord4i");
+	glad_glMultiTexCoord4iv = (PFNGLMULTITEXCOORD4IVPROC)load("glMultiTexCoord4iv");
+	glad_glMultiTexCoord4s = (PFNGLMULTITEXCOORD4SPROC)load("glMultiTexCoord4s");
+	glad_glMultiTexCoord4sv = (PFNGLMULTITEXCOORD4SVPROC)load("glMultiTexCoord4sv");
+	glad_glLoadTransposeMatrixf = (PFNGLLOADTRANSPOSEMATRIXFPROC)load("glLoadTransposeMatrixf");
+	glad_glLoadTransposeMatrixd = (PFNGLLOADTRANSPOSEMATRIXDPROC)load("glLoadTransposeMatrixd");
+	glad_glMultTransposeMatrixf = (PFNGLMULTTRANSPOSEMATRIXFPROC)load("glMultTransposeMatrixf");
+	glad_glMultTransposeMatrixd = (PFNGLMULTTRANSPOSEMATRIXDPROC)load("glMultTransposeMatrixd");
+}
+static void load_GL_VERSION_1_4(GLADloadproc load) {
+	if (!GLAD_GL_VERSION_1_4) return;
+	glad_glBlendFuncSeparate = (PFNGLBLENDFUNCSEPARATEPROC)load("glBlendFuncSeparate");
+	glad_glMultiDrawArrays = (PFNGLMULTIDRAWARRAYSPROC)load("glMultiDrawArrays");
+	glad_glMultiDrawElements = (PFNGLMULTIDRAWELEMENTSPROC)load("glMultiDrawElements");
+	glad_glPointParameterf = (PFNGLPOINTPARAMETERFPROC)load("glPointParameterf");
+	glad_glPointParameterfv = (PFNGLPOINTPARAMETERFVPROC)load("glPointParameterfv");
+	glad_glPointParameteri = (PFNGLPOINTPARAMETERIPROC)load("glPointParameteri");
+	glad_glPointParameteriv = (PFNGLPOINTPARAMETERIVPROC)load("glPointParameteriv");
+	glad_glFogCoordf = (PFNGLFOGCOORDFPROC)load("glFogCoordf");
+	glad_glFogCoordfv = (PFNGLFOGCOORDFVPROC)load("glFogCoordfv");
+	glad_glFogCoordd = (PFNGLFOGCOORDDPROC)load("glFogCoordd");
+	glad_glFogCoorddv = (PFNGLFOGCOORDDVPROC)load("glFogCoorddv");
+	glad_glFogCoordPointer = (PFNGLFOGCOORDPOINTERPROC)load("glFogCoordPointer");
+	glad_glSecondaryColor3b = (PFNGLSECONDARYCOLOR3BPROC)load("glSecondaryColor3b");
+	glad_glSecondaryColor3bv = (PFNGLSECONDARYCOLOR3BVPROC)load("glSecondaryColor3bv");
+	glad_glSecondaryColor3d = (PFNGLSECONDARYCOLOR3DPROC)load("glSecondaryColor3d");
+	glad_glSecondaryColor3dv = (PFNGLSECONDARYCOLOR3DVPROC)load("glSecondaryColor3dv");
+	glad_glSecondaryColor3f = (PFNGLSECONDARYCOLOR3FPROC)load("glSecondaryColor3f");
+	glad_glSecondaryColor3fv = (PFNGLSECONDARYCOLOR3FVPROC)load("glSecondaryColor3fv");
+	glad_glSecondaryColor3i = (PFNGLSECONDARYCOLOR3IPROC)load("glSecondaryColor3i");
+	glad_glSecondaryColor3iv = (PFNGLSECONDARYCOLOR3IVPROC)load("glSecondaryColor3iv");
+	glad_glSecondaryColor3s = (PFNGLSECONDARYCOLOR3SPROC)load("glSecondaryColor3s");
+	glad_glSecondaryColor3sv = (PFNGLSECONDARYCOLOR3SVPROC)load("glSecondaryColor3sv");
+	glad_glSecondaryColor3ub = (PFNGLSECONDARYCOLOR3UBPROC)load("glSecondaryColor3ub");
+	glad_glSecondaryColor3ubv = (PFNGLSECONDARYCOLOR3UBVPROC)load("glSecondaryColor3ubv");
+	glad_glSecondaryColor3ui = (PFNGLSECONDARYCOLOR3UIPROC)load("glSecondaryColor3ui");
+	glad_glSecondaryColor3uiv = (PFNGLSECONDARYCOLOR3UIVPROC)load("glSecondaryColor3uiv");
+	glad_glSecondaryColor3us = (PFNGLSECONDARYCOLOR3USPROC)load("glSecondaryColor3us");
+	glad_glSecondaryColor3usv = (PFNGLSECONDARYCOLOR3USVPROC)load("glSecondaryColor3usv");
+	glad_glSecondaryColorPointer = (PFNGLSECONDARYCOLORPOINTERPROC)load("glSecondaryColorPointer");
+	glad_glWindowPos2d = (PFNGLWINDOWPOS2DPROC)load("glWindowPos2d");
+	glad_glWindowPos2dv = (PFNGLWINDOWPOS2DVPROC)load("glWindowPos2dv");
+	glad_glWindowPos2f = (PFNGLWINDOWPOS2FPROC)load("glWindowPos2f");
+	glad_glWindowPos2fv = (PFNGLWINDOWPOS2FVPROC)load("glWindowPos2fv");
+	glad_glWindowPos2i = (PFNGLWINDOWPOS2IPROC)load("glWindowPos2i");
+	glad_glWindowPos2iv = (PFNGLWINDOWPOS2IVPROC)load("glWindowPos2iv");
+	glad_glWindowPos2s = (PFNGLWINDOWPOS2SPROC)load("glWindowPos2s");
+	glad_glWindowPos2sv = (PFNGLWINDOWPOS2SVPROC)load("glWindowPos2sv");
+	glad_glWindowPos3d = (PFNGLWINDOWPOS3DPROC)load("glWindowPos3d");
+	glad_glWindowPos3dv = (PFNGLWINDOWPOS3DVPROC)load("glWindowPos3dv");
+	glad_glWindowPos3f = (PFNGLWINDOWPOS3FPROC)load("glWindowPos3f");
+	glad_glWindowPos3fv = (PFNGLWINDOWPOS3FVPROC)load("glWindowPos3fv");
+	glad_glWindowPos3i = (PFNGLWINDOWPOS3IPROC)load("glWindowPos3i");
+	glad_glWindowPos3iv = (PFNGLWINDOWPOS3IVPROC)load("glWindowPos3iv");
+	glad_glWindowPos3s = (PFNGLWINDOWPOS3SPROC)load("glWindowPos3s");
+	glad_glWindowPos3sv = (PFNGLWINDOWPOS3SVPROC)load("glWindowPos3sv");
+	glad_glBlendColor = (PFNGLBLENDCOLORPROC)load("glBlendColor");
+	glad_glBlendEquation = (PFNGLBLENDEQUATIONPROC)load("glBlendEquation");
+}
+static void load_GL_VERSION_1_5(GLADloadproc load) {
+	if (!GLAD_GL_VERSION_1_5) return;
+	glad_glGenQueries = (PFNGLGENQUERIESPROC)load("glGenQueries");
+	glad_glDeleteQueries = (PFNGLDELETEQUERIESPROC)load("glDeleteQueries");
+	glad_glIsQuery = (PFNGLISQUERYPROC)load("glIsQuery");
+	glad_glBeginQuery = (PFNGLBEGINQUERYPROC)load("glBeginQuery");
+	glad_glEndQuery = (PFNGLENDQUERYPROC)load("glEndQuery");
+	glad_glGetQueryiv = (PFNGLGETQUERYIVPROC)load("glGetQueryiv");
+	glad_glGetQueryObjectiv = (PFNGLGETQUERYOBJECTIVPROC)load("glGetQueryObjectiv");
+	glad_glGetQueryObjectuiv = (PFNGLGETQUERYOBJECTUIVPROC)load("glGetQueryObjectuiv");
+	glad_glBindBuffer = (PFNGLBINDBUFFERPROC)load("glBindBuffer");
+	glad_glDeleteBuffers = (PFNGLDELETEBUFFERSPROC)load("glDeleteBuffers");
+	glad_glGenBuffers = (PFNGLGENBUFFERSPROC)load("glGenBuffers");
+	glad_glIsBuffer = (PFNGLISBUFFERPROC)load("glIsBuffer");
+	glad_glBufferData = (PFNGLBUFFERDATAPROC)load("glBufferData");
+	glad_glBufferSubData = (PFNGLBUFFERSUBDATAPROC)load("glBufferSubData");
+	glad_glGetBufferSubData = (PFNGLGETBUFFERSUBDATAPROC)load("glGetBufferSubData");
+	glad_glMapBuffer = (PFNGLMAPBUFFERPROC)load("glMapBuffer");
+	glad_glUnmapBuffer = (PFNGLUNMAPBUFFERPROC)load("glUnmapBuffer");
+	glad_glGetBufferParameteriv = (PFNGLGETBUFFERPARAMETERIVPROC)load("glGetBufferParameteriv");
+	glad_glGetBufferPointerv = (PFNGLGETBUFFERPOINTERVPROC)load("glGetBufferPointerv");
+}
+static void load_GL_VERSION_2_0(GLADloadproc load) {
+	if (!GLAD_GL_VERSION_2_0) return;
+	glad_glBlendEquationSeparate = (PFNGLBLENDEQUATIONSEPARATEPROC)load("glBlendEquationSeparate");
+	glad_glDrawBuffers = (PFNGLDRAWBUFFERSPROC)load("glDrawBuffers");
+	glad_glStencilOpSeparate = (PFNGLSTENCILOPSEPARATEPROC)load("glStencilOpSeparate");
+	glad_glStencilFuncSeparate = (PFNGLSTENCILFUNCSEPARATEPROC)load("glStencilFuncSeparate");
+	glad_glStencilMaskSeparate = (PFNGLSTENCILMASKSEPARATEPROC)load("glStencilMaskSeparate");
+	glad_glAttachShader = (PFNGLATTACHSHADERPROC)load("glAttachShader");
+	glad_glBindAttribLocation = (PFNGLBINDATTRIBLOCATIONPROC)load("glBindAttribLocation");
+	glad_glCompileShader = (PFNGLCOMPILESHADERPROC)load("glCompileShader");
+	glad_glCreateProgram = (PFNGLCREATEPROGRAMPROC)load("glCreateProgram");
+	glad_glCreateShader = (PFNGLCREATESHADERPROC)load("glCreateShader");
+	glad_glDeleteProgram = (PFNGLDELETEPROGRAMPROC)load("glDeleteProgram");
+	glad_glDeleteShader = (PFNGLDELETESHADERPROC)load("glDeleteShader");
+	glad_glDetachShader = (PFNGLDETACHSHADERPROC)load("glDetachShader");
+	glad_glDisableVertexAttribArray = (PFNGLDISABLEVERTEXATTRIBARRAYPROC)load("glDisableVertexAttribArray");
+	glad_glEnableVertexAttribArray = (PFNGLENABLEVERTEXATTRIBARRAYPROC)load("glEnableVertexAttribArray");
+	glad_glGetActiveAttrib = (PFNGLGETACTIVEATTRIBPROC)load("glGetActiveAttrib");
+	glad_glGetActiveUniform = (PFNGLGETACTIVEUNIFORMPROC)load("glGetActiveUniform");
+	glad_glGetAttachedShaders = (PFNGLGETATTACHEDSHADERSPROC)load("glGetAttachedShaders");
+	glad_glGetAttribLocation = (PFNGLGETATTRIBLOCATIONPROC)load("glGetAttribLocation");
+	glad_glGetProgramiv = (PFNGLGETPROGRAMIVPROC)load("glGetProgramiv");
+	glad_glGetProgramInfoLog = (PFNGLGETPROGRAMINFOLOGPROC)load("glGetProgramInfoLog");
+	glad_glGetShaderiv = (PFNGLGETSHADERIVPROC)load("glGetShaderiv");
+	glad_glGetShaderInfoLog = (PFNGLGETSHADERINFOLOGPROC)load("glGetShaderInfoLog");
+	glad_glGetShaderSource = (PFNGLGETSHADERSOURCEPROC)load("glGetShaderSource");
+	glad_glGetUniformLocation = (PFNGLGETUNIFORMLOCATIONPROC)load("glGetUniformLocation");
+	glad_glGetUniformfv = (PFNGLGETUNIFORMFVPROC)load("glGetUniformfv");
+	glad_glGetUniformiv = (PFNGLGETUNIFORMIVPROC)load("glGetUniformiv");
+	glad_glGetVertexAttribdv = (PFNGLGETVERTEXATTRIBDVPROC)load("glGetVertexAttribdv");
+	glad_glGetVertexAttribfv = (PFNGLGETVERTEXATTRIBFVPROC)load("glGetVertexAttribfv");
+	glad_glGetVertexAttribiv = (PFNGLGETVERTEXATTRIBIVPROC)load("glGetVertexAttribiv");
+	glad_glGetVertexAttribPointerv = (PFNGLGETVERTEXATTRIBPOINTERVPROC)load("glGetVertexAttribPointerv");
+	glad_glIsProgram = (PFNGLISPROGRAMPROC)load("glIsProgram");
+	glad_glIsShader = (PFNGLISSHADERPROC)load("glIsShader");
+	glad_glLinkProgram = (PFNGLLINKPROGRAMPROC)load("glLinkProgram");
+	glad_glShaderSource = (PFNGLSHADERSOURCEPROC)load("glShaderSource");
+	glad_glUseProgram = (PFNGLUSEPROGRAMPROC)load("glUseProgram");
+	glad_glUniform1f = (PFNGLUNIFORM1FPROC)load("glUniform1f");
+	glad_glUniform2f = (PFNGLUNIFORM2FPROC)load("glUniform2f");
+	glad_glUniform3f = (PFNGLUNIFORM3FPROC)load("glUniform3f");
+	glad_glUniform4f = (PFNGLUNIFORM4FPROC)load("glUniform4f");
+	glad_glUniform1i = (PFNGLUNIFORM1IPROC)load("glUniform1i");
+	glad_glUniform2i = (PFNGLUNIFORM2IPROC)load("glUniform2i");
+	glad_glUniform3i = (PFNGLUNIFORM3IPROC)load("glUniform3i");
+	glad_glUniform4i = (PFNGLUNIFORM4IPROC)load("glUniform4i");
+	glad_glUniform1fv = (PFNGLUNIFORM1FVPROC)load("glUniform1fv");
+	glad_glUniform2fv = (PFNGLUNIFORM2FVPROC)load("glUniform2fv");
+	glad_glUniform3fv = (PFNGLUNIFORM3FVPROC)load("glUniform3fv");
+	glad_glUniform4fv = (PFNGLUNIFORM4FVPROC)load("glUniform4fv");
+	glad_glUniform1iv = (PFNGLUNIFORM1IVPROC)load("glUniform1iv");
+	glad_glUniform2iv = (PFNGLUNIFORM2IVPROC)load("glUniform2iv");
+	glad_glUniform3iv = (PFNGLUNIFORM3IVPROC)load("glUniform3iv");
+	glad_glUniform4iv = (PFNGLUNIFORM4IVPROC)load("glUniform4iv");
+	glad_glUniformMatrix2fv = (PFNGLUNIFORMMATRIX2FVPROC)load("glUniformMatrix2fv");
+	glad_glUniformMatrix3fv = (PFNGLUNIFORMMATRIX3FVPROC)load("glUniformMatrix3fv");
+	glad_glUniformMatrix4fv = (PFNGLUNIFORMMATRIX4FVPROC)load("glUniformMatrix4fv");
+	glad_glValidateProgram = (PFNGLVALIDATEPROGRAMPROC)load("glValidateProgram");
+	glad_glVertexAttrib1d = (PFNGLVERTEXATTRIB1DPROC)load("glVertexAttrib1d");
+	glad_glVertexAttrib1dv = (PFNGLVERTEXATTRIB1DVPROC)load("glVertexAttrib1dv");
+	glad_glVertexAttrib1f = (PFNGLVERTEXATTRIB1FPROC)load("glVertexAttrib1f");
+	glad_glVertexAttrib1fv = (PFNGLVERTEXATTRIB1FVPROC)load("glVertexAttrib1fv");
+	glad_glVertexAttrib1s = (PFNGLVERTEXATTRIB1SPROC)load("glVertexAttrib1s");
+	glad_glVertexAttrib1sv = (PFNGLVERTEXATTRIB1SVPROC)load("glVertexAttrib1sv");
+	glad_glVertexAttrib2d = (PFNGLVERTEXATTRIB2DPROC)load("glVertexAttrib2d");
+	glad_glVertexAttrib2dv = (PFNGLVERTEXATTRIB2DVPROC)load("glVertexAttrib2dv");
+	glad_glVertexAttrib2f = (PFNGLVERTEXATTRIB2FPROC)load("glVertexAttrib2f");
+	glad_glVertexAttrib2fv = (PFNGLVERTEXATTRIB2FVPROC)load("glVertexAttrib2fv");
+	glad_glVertexAttrib2s = (PFNGLVERTEXATTRIB2SPROC)load("glVertexAttrib2s");
+	glad_glVertexAttrib2sv = (PFNGLVERTEXATTRIB2SVPROC)load("glVertexAttrib2sv");
+	glad_glVertexAttrib3d = (PFNGLVERTEXATTRIB3DPROC)load("glVertexAttrib3d");
+	glad_glVertexAttrib3dv = (PFNGLVERTEXATTRIB3DVPROC)load("glVertexAttrib3dv");
+	glad_glVertexAttrib3f = (PFNGLVERTEXATTRIB3FPROC)load("glVertexAttrib3f");
+	glad_glVertexAttrib3fv = (PFNGLVERTEXATTRIB3FVPROC)load("glVertexAttrib3fv");
+	glad_glVertexAttrib3s = (PFNGLVERTEXATTRIB3SPROC)load("glVertexAttrib3s");
+	glad_glVertexAttrib3sv = (PFNGLVERTEXATTRIB3SVPROC)load("glVertexAttrib3sv");
+	glad_glVertexAttrib4Nbv = (PFNGLVERTEXATTRIB4NBVPROC)load("glVertexAttrib4Nbv");
+	glad_glVertexAttrib4Niv = (PFNGLVERTEXATTRIB4NIVPROC)load("glVertexAttrib4Niv");
+	glad_glVertexAttrib4Nsv = (PFNGLVERTEXATTRIB4NSVPROC)load("glVertexAttrib4Nsv");
+	glad_glVertexAttrib4Nub = (PFNGLVERTEXATTRIB4NUBPROC)load("glVertexAttrib4Nub");
+	glad_glVertexAttrib4Nubv = (PFNGLVERTEXATTRIB4NUBVPROC)load("glVertexAttrib4Nubv");
+	glad_glVertexAttrib4Nuiv = (PFNGLVERTEXATTRIB4NUIVPROC)load("glVertexAttrib4Nuiv");
+	glad_glVertexAttrib4Nusv = (PFNGLVERTEXATTRIB4NUSVPROC)load("glVertexAttrib4Nusv");
+	glad_glVertexAttrib4bv = (PFNGLVERTEXATTRIB4BVPROC)load("glVertexAttrib4bv");
+	glad_glVertexAttrib4d = (PFNGLVERTEXATTRIB4DPROC)load("glVertexAttrib4d");
+	glad_glVertexAttrib4dv = (PFNGLVERTEXATTRIB4DVPROC)load("glVertexAttrib4dv");
+	glad_glVertexAttrib4f = (PFNGLVERTEXATTRIB4FPROC)load("glVertexAttrib4f");
+	glad_glVertexAttrib4fv = (PFNGLVERTEXATTRIB4FVPROC)load("glVertexAttrib4fv");
+	glad_glVertexAttrib4iv = (PFNGLVERTEXATTRIB4IVPROC)load("glVertexAttrib4iv");
+	glad_glVertexAttrib4s = (PFNGLVERTEXATTRIB4SPROC)load("glVertexAttrib4s");
+	glad_glVertexAttrib4sv = (PFNGLVERTEXATTRIB4SVPROC)load("glVertexAttrib4sv");
+	glad_glVertexAttrib4ubv = (PFNGLVERTEXATTRIB4UBVPROC)load("glVertexAttrib4ubv");
+	glad_glVertexAttrib4uiv = (PFNGLVERTEXATTRIB4UIVPROC)load("glVertexAttrib4uiv");
+	glad_glVertexAttrib4usv = (PFNGLVERTEXATTRIB4USVPROC)load("glVertexAttrib4usv");
+	glad_glVertexAttribPointer = (PFNGLVERTEXATTRIBPOINTERPROC)load("glVertexAttribPointer");
+}
+static void load_GL_VERSION_2_1(GLADloadproc load) {
+	if (!GLAD_GL_VERSION_2_1) return;
+	glad_glUniformMatrix2x3fv = (PFNGLUNIFORMMATRIX2X3FVPROC)load("glUniformMatrix2x3fv");
+	glad_glUniformMatrix3x2fv = (PFNGLUNIFORMMATRIX3X2FVPROC)load("glUniformMatrix3x2fv");
+	glad_glUniformMatrix2x4fv = (PFNGLUNIFORMMATRIX2X4FVPROC)load("glUniformMatrix2x4fv");
+	glad_glUniformMatrix4x2fv = (PFNGLUNIFORMMATRIX4X2FVPROC)load("glUniformMatrix4x2fv");
+	glad_glUniformMatrix3x4fv = (PFNGLUNIFORMMATRIX3X4FVPROC)load("glUniformMatrix3x4fv");
+	glad_glUniformMatrix4x3fv = (PFNGLUNIFORMMATRIX4X3FVPROC)load("glUniformMatrix4x3fv");
+}
+static void load_GL_VERSION_3_0(GLADloadproc load) {
+	if (!GLAD_GL_VERSION_3_0) return;
+	glad_glColorMaski = (PFNGLCOLORMASKIPROC)load("glColorMaski");
+	glad_glGetBooleani_v = (PFNGLGETBOOLEANI_VPROC)load("glGetBooleani_v");
+	glad_glGetIntegeri_v = (PFNGLGETINTEGERI_VPROC)load("glGetIntegeri_v");
+	glad_glEnablei = (PFNGLENABLEIPROC)load("glEnablei");
+	glad_glDisablei = (PFNGLDISABLEIPROC)load("glDisablei");
+	glad_glIsEnabledi = (PFNGLISENABLEDIPROC)load("glIsEnabledi");
+	glad_glBeginTransformFeedback = (PFNGLBEGINTRANSFORMFEEDBACKPROC)load("glBeginTransformFeedback");
+	glad_glEndTransformFeedback = (PFNGLENDTRANSFORMFEEDBACKPROC)load("glEndTransformFeedback");
+	glad_glBindBufferRange = (PFNGLBINDBUFFERRANGEPROC)load("glBindBufferRange");
+	glad_glBindBufferBase = (PFNGLBINDBUFFERBASEPROC)load("glBindBufferBase");
+	glad_glTransformFeedbackVaryings = (PFNGLTRANSFORMFEEDBACKVARYINGSPROC)load("glTransformFeedbackVaryings");
+	glad_glGetTransformFeedbackVarying = (PFNGLGETTRANSFORMFEEDBACKVARYINGPROC)load("glGetTransformFeedbackVarying");
+	glad_glClampColor = (PFNGLCLAMPCOLORPROC)load("glClampColor");
+	glad_glBeginConditionalRender = (PFNGLBEGINCONDITIONALRENDERPROC)load("glBeginConditionalRender");
+	glad_glEndConditionalRender = (PFNGLENDCONDITIONALRENDERPROC)load("glEndConditionalRender");
+	glad_glVertexAttribIPointer = (PFNGLVERTEXATTRIBIPOINTERPROC)load("glVertexAttribIPointer");
+	glad_glGetVertexAttribIiv = (PFNGLGETVERTEXATTRIBIIVPROC)load("glGetVertexAttribIiv");
+	glad_glGetVertexAttribIuiv = (PFNGLGETVERTEXATTRIBIUIVPROC)load("glGetVertexAttribIuiv");
+	glad_glVertexAttribI1i = (PFNGLVERTEXATTRIBI1IPROC)load("glVertexAttribI1i");
+	glad_glVertexAttribI2i = (PFNGLVERTEXATTRIBI2IPROC)load("glVertexAttribI2i");
+	glad_glVertexAttribI3i = (PFNGLVERTEXATTRIBI3IPROC)load("glVertexAttribI3i");
+	glad_glVertexAttribI4i = (PFNGLVERTEXATTRIBI4IPROC)load("glVertexAttribI4i");
+	glad_glVertexAttribI1ui = (PFNGLVERTEXATTRIBI1UIPROC)load("glVertexAttribI1ui");
+	glad_glVertexAttribI2ui = (PFNGLVERTEXATTRIBI2UIPROC)load("glVertexAttribI2ui");
+	glad_glVertexAttribI3ui = (PFNGLVERTEXATTRIBI3UIPROC)load("glVertexAttribI3ui");
+	glad_glVertexAttribI4ui = (PFNGLVERTEXATTRIBI4UIPROC)load("glVertexAttribI4ui");
+	glad_glVertexAttribI1iv = (PFNGLVERTEXATTRIBI1IVPROC)load("glVertexAttribI1iv");
+	glad_glVertexAttribI2iv = (PFNGLVERTEXATTRIBI2IVPROC)load("glVertexAttribI2iv");
+	glad_glVertexAttribI3iv = (PFNGLVERTEXATTRIBI3IVPROC)load("glVertexAttribI3iv");
+	glad_glVertexAttribI4iv = (PFNGLVERTEXATTRIBI4IVPROC)load("glVertexAttribI4iv");
+	glad_glVertexAttribI1uiv = (PFNGLVERTEXATTRIBI1UIVPROC)load("glVertexAttribI1uiv");
+	glad_glVertexAttribI2uiv = (PFNGLVERTEXATTRIBI2UIVPROC)load("glVertexAttribI2uiv");
+	glad_glVertexAttribI3uiv = (PFNGLVERTEXATTRIBI3UIVPROC)load("glVertexAttribI3uiv");
+	glad_glVertexAttribI4uiv = (PFNGLVERTEXATTRIBI4UIVPROC)load("glVertexAttribI4uiv");
+	glad_glVertexAttribI4bv = (PFNGLVERTEXATTRIBI4BVPROC)load("glVertexAttribI4bv");
+	glad_glVertexAttribI4sv = (PFNGLVERTEXATTRIBI4SVPROC)load("glVertexAttribI4sv");
+	glad_glVertexAttribI4ubv = (PFNGLVERTEXATTRIBI4UBVPROC)load("glVertexAttribI4ubv");
+	glad_glVertexAttribI4usv = (PFNGLVERTEXATTRIBI4USVPROC)load("glVertexAttribI4usv");
+	glad_glGetUniformuiv = (PFNGLGETUNIFORMUIVPROC)load("glGetUniformuiv");
+	glad_glBindFragDataLocation = (PFNGLBINDFRAGDATALOCATIONPROC)load("glBindFragDataLocation");
+	glad_glGetFragDataLocation = (PFNGLGETFRAGDATALOCATIONPROC)load("glGetFragDataLocation");
+	glad_glUniform1ui = (PFNGLUNIFORM1UIPROC)load("glUniform1ui");
+	glad_glUniform2ui = (PFNGLUNIFORM2UIPROC)load("glUniform2ui");
+	glad_glUniform3ui = (PFNGLUNIFORM3UIPROC)load("glUniform3ui");
+	glad_glUniform4ui = (PFNGLUNIFORM4UIPROC)load("glUniform4ui");
+	glad_glUniform1uiv = (PFNGLUNIFORM1UIVPROC)load("glUniform1uiv");
+	glad_glUniform2uiv = (PFNGLUNIFORM2UIVPROC)load("glUniform2uiv");
+	glad_glUniform3uiv = (PFNGLUNIFORM3UIVPROC)load("glUniform3uiv");
+	glad_glUniform4uiv = (PFNGLUNIFORM4UIVPROC)load("glUniform4uiv");
+	glad_glTexParameterIiv = (PFNGLTEXPARAMETERIIVPROC)load("glTexParameterIiv");
+	glad_glTexParameterIuiv = (PFNGLTEXPARAMETERIUIVPROC)load("glTexParameterIuiv");
+	glad_glGetTexParameterIiv = (PFNGLGETTEXPARAMETERIIVPROC)load("glGetTexParameterIiv");
+	glad_glGetTexParameterIuiv = (PFNGLGETTEXPARAMETERIUIVPROC)load("glGetTexParameterIuiv");
+	glad_glClearBufferiv = (PFNGLCLEARBUFFERIVPROC)load("glClearBufferiv");
+	glad_glClearBufferuiv = (PFNGLCLEARBUFFERUIVPROC)load("glClearBufferuiv");
+	glad_glClearBufferfv = (PFNGLCLEARBUFFERFVPROC)load("glClearBufferfv");
+	glad_glClearBufferfi = (PFNGLCLEARBUFFERFIPROC)load("glClearBufferfi");
+	glad_glGetStringi = (PFNGLGETSTRINGIPROC)load("glGetStringi");
+	glad_glIsRenderbuffer = (PFNGLISRENDERBUFFERPROC)load("glIsRenderbuffer");
+	glad_glBindRenderbuffer = (PFNGLBINDRENDERBUFFERPROC)load("glBindRenderbuffer");
+	glad_glDeleteRenderbuffers = (PFNGLDELETERENDERBUFFERSPROC)load("glDeleteRenderbuffers");
+	glad_glGenRenderbuffers = (PFNGLGENRENDERBUFFERSPROC)load("glGenRenderbuffers");
+	glad_glRenderbufferStorage = (PFNGLRENDERBUFFERSTORAGEPROC)load("glRenderbufferStorage");
+	glad_glGetRenderbufferParameteriv = (PFNGLGETRENDERBUFFERPARAMETERIVPROC)load("glGetRenderbufferParameteriv");
+	glad_glIsFramebuffer = (PFNGLISFRAMEBUFFERPROC)load("glIsFramebuffer");
+	glad_glBindFramebuffer = (PFNGLBINDFRAMEBUFFERPROC)load("glBindFramebuffer");
+	glad_glDeleteFramebuffers = (PFNGLDELETEFRAMEBUFFERSPROC)load("glDeleteFramebuffers");
+	glad_glGenFramebuffers = (PFNGLGENFRAMEBUFFERSPROC)load("glGenFramebuffers");
+	glad_glCheckFramebufferStatus = (PFNGLCHECKFRAMEBUFFERSTATUSPROC)load("glCheckFramebufferStatus");
+	glad_glFramebufferTexture1D = (PFNGLFRAMEBUFFERTEXTURE1DPROC)load("glFramebufferTexture1D");
+	glad_glFramebufferTexture2D = (PFNGLFRAMEBUFFERTEXTURE2DPROC)load("glFramebufferTexture2D");
+	glad_glFramebufferTexture3D = (PFNGLFRAMEBUFFERTEXTURE3DPROC)load("glFramebufferTexture3D");
+	glad_glFramebufferRenderbuffer = (PFNGLFRAMEBUFFERRENDERBUFFERPROC)load("glFramebufferRenderbuffer");
+	glad_glGetFramebufferAttachmentParameteriv = (PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC)load("glGetFramebufferAttachmentParameteriv");
+	glad_glGenerateMipmap = (PFNGLGENERATEMIPMAPPROC)load("glGenerateMipmap");
+	glad_glBlitFramebuffer = (PFNGLBLITFRAMEBUFFERPROC)load("glBlitFramebuffer");
+	glad_glRenderbufferStorageMultisample = (PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC)load("glRenderbufferStorageMultisample");
+	glad_glFramebufferTextureLayer = (PFNGLFRAMEBUFFERTEXTURELAYERPROC)load("glFramebufferTextureLayer");
+	glad_glMapBufferRange = (PFNGLMAPBUFFERRANGEPROC)load("glMapBufferRange");
+	glad_glFlushMappedBufferRange = (PFNGLFLUSHMAPPEDBUFFERRANGEPROC)load("glFlushMappedBufferRange");
+	glad_glBindVertexArray = (PFNGLBINDVERTEXARRAYPROC)load("glBindVertexArray");
+	glad_glDeleteVertexArrays = (PFNGLDELETEVERTEXARRAYSPROC)load("glDeleteVertexArrays");
+	glad_glGenVertexArrays = (PFNGLGENVERTEXARRAYSPROC)load("glGenVertexArrays");
+	glad_glIsVertexArray = (PFNGLISVERTEXARRAYPROC)load("glIsVertexArray");
+}
+static void load_GL_VERSION_3_1(GLADloadproc load) {
+	if (!GLAD_GL_VERSION_3_1) return;
+	glad_glDrawArraysInstanced = (PFNGLDRAWARRAYSINSTANCEDPROC)load("glDrawArraysInstanced");
+	glad_glDrawElementsInstanced = (PFNGLDRAWELEMENTSINSTANCEDPROC)load("glDrawElementsInstanced");
+	glad_glTexBuffer = (PFNGLTEXBUFFERPROC)load("glTexBuffer");
+	glad_glPrimitiveRestartIndex = (PFNGLPRIMITIVERESTARTINDEXPROC)load("glPrimitiveRestartIndex");
+	glad_glCopyBufferSubData = (PFNGLCOPYBUFFERSUBDATAPROC)load("glCopyBufferSubData");
+	glad_glGetUniformIndices = (PFNGLGETUNIFORMINDICESPROC)load("glGetUniformIndices");
+	glad_glGetActiveUniformsiv = (PFNGLGETACTIVEUNIFORMSIVPROC)load("glGetActiveUniformsiv");
+	glad_glGetActiveUniformName = (PFNGLGETACTIVEUNIFORMNAMEPROC)load("glGetActiveUniformName");
+	glad_glGetUniformBlockIndex = (PFNGLGETUNIFORMBLOCKINDEXPROC)load("glGetUniformBlockIndex");
+	glad_glGetActiveUniformBlockiv = (PFNGLGETACTIVEUNIFORMBLOCKIVPROC)load("glGetActiveUniformBlockiv");
+	glad_glGetActiveUniformBlockName = (PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC)load("glGetActiveUniformBlockName");
+	glad_glUniformBlockBinding = (PFNGLUNIFORMBLOCKBINDINGPROC)load("glUniformBlockBinding");
+	glad_glBindBufferRange = (PFNGLBINDBUFFERRANGEPROC)load("glBindBufferRange");
+	glad_glBindBufferBase = (PFNGLBINDBUFFERBASEPROC)load("glBindBufferBase");
+	glad_glGetIntegeri_v = (PFNGLGETINTEGERI_VPROC)load("glGetIntegeri_v");
+}
+static void load_GL_VERSION_3_2(GLADloadproc load) {
+	if (!GLAD_GL_VERSION_3_2) return;
+	glad_glDrawElementsBaseVertex = (PFNGLDRAWELEMENTSBASEVERTEXPROC)load("glDrawElementsBaseVertex");
+	glad_glDrawRangeElementsBaseVertex = (PFNGLDRAWRANGEELEMENTSBASEVERTEXPROC)load("glDrawRangeElementsBaseVertex");
+	glad_glDrawElementsInstancedBaseVertex = (PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXPROC)load("glDrawElementsInstancedBaseVertex");
+	glad_glMultiDrawElementsBaseVertex = (PFNGLMULTIDRAWELEMENTSBASEVERTEXPROC)load("glMultiDrawElementsBaseVertex");
+	glad_glProvokingVertex = (PFNGLPROVOKINGVERTEXPROC)load("glProvokingVertex");
+	glad_glFenceSync = (PFNGLFENCESYNCPROC)load("glFenceSync");
+	glad_glIsSync = (PFNGLISSYNCPROC)load("glIsSync");
+	glad_glDeleteSync = (PFNGLDELETESYNCPROC)load("glDeleteSync");
+	glad_glClientWaitSync = (PFNGLCLIENTWAITSYNCPROC)load("glClientWaitSync");
+	glad_glWaitSync = (PFNGLWAITSYNCPROC)load("glWaitSync");
+	glad_glGetInteger64v = (PFNGLGETINTEGER64VPROC)load("glGetInteger64v");
+	glad_glGetSynciv = (PFNGLGETSYNCIVPROC)load("glGetSynciv");
+	glad_glGetInteger64i_v = (PFNGLGETINTEGER64I_VPROC)load("glGetInteger64i_v");
+	glad_glGetBufferParameteri64v = (PFNGLGETBUFFERPARAMETERI64VPROC)load("glGetBufferParameteri64v");
+	glad_glFramebufferTexture = (PFNGLFRAMEBUFFERTEXTUREPROC)load("glFramebufferTexture");
+	glad_glTexImage2DMultisample = (PFNGLTEXIMAGE2DMULTISAMPLEPROC)load("glTexImage2DMultisample");
+	glad_glTexImage3DMultisample = (PFNGLTEXIMAGE3DMULTISAMPLEPROC)load("glTexImage3DMultisample");
+	glad_glGetMultisamplefv = (PFNGLGETMULTISAMPLEFVPROC)load("glGetMultisamplefv");
+	glad_glSampleMaski = (PFNGLSAMPLEMASKIPROC)load("glSampleMaski");
+}
+static void load_GL_VERSION_3_3(GLADloadproc load) {
+	if (!GLAD_GL_VERSION_3_3) return;
+	glad_glBindFragDataLocationIndexed = (PFNGLBINDFRAGDATALOCATIONINDEXEDPROC)load("glBindFragDataLocationIndexed");
+	glad_glGetFragDataIndex = (PFNGLGETFRAGDATAINDEXPROC)load("glGetFragDataIndex");
+	glad_glGenSamplers = (PFNGLGENSAMPLERSPROC)load("glGenSamplers");
+	glad_glDeleteSamplers = (PFNGLDELETESAMPLERSPROC)load("glDeleteSamplers");
+	glad_glIsSampler = (PFNGLISSAMPLERPROC)load("glIsSampler");
+	glad_glBindSampler = (PFNGLBINDSAMPLERPROC)load("glBindSampler");
+	glad_glSamplerParameteri = (PFNGLSAMPLERPARAMETERIPROC)load("glSamplerParameteri");
+	glad_glSamplerParameteriv = (PFNGLSAMPLERPARAMETERIVPROC)load("glSamplerParameteriv");
+	glad_glSamplerParameterf = (PFNGLSAMPLERPARAMETERFPROC)load("glSamplerParameterf");
+	glad_glSamplerParameterfv = (PFNGLSAMPLERPARAMETERFVPROC)load("glSamplerParameterfv");
+	glad_glSamplerParameterIiv = (PFNGLSAMPLERPARAMETERIIVPROC)load("glSamplerParameterIiv");
+	glad_glSamplerParameterIuiv = (PFNGLSAMPLERPARAMETERIUIVPROC)load("glSamplerParameterIuiv");
+	glad_glGetSamplerParameteriv = (PFNGLGETSAMPLERPARAMETERIVPROC)load("glGetSamplerParameteriv");
+	glad_glGetSamplerParameterIiv = (PFNGLGETSAMPLERPARAMETERIIVPROC)load("glGetSamplerParameterIiv");
+	glad_glGetSamplerParameterfv = (PFNGLGETSAMPLERPARAMETERFVPROC)load("glGetSamplerParameterfv");
+	glad_glGetSamplerParameterIuiv = (PFNGLGETSAMPLERPARAMETERIUIVPROC)load("glGetSamplerParameterIuiv");
+	glad_glQueryCounter = (PFNGLQUERYCOUNTERPROC)load("glQueryCounter");
+	glad_glGetQueryObjecti64v = (PFNGLGETQUERYOBJECTI64VPROC)load("glGetQueryObjecti64v");
+	glad_glGetQueryObjectui64v = (PFNGLGETQUERYOBJECTUI64VPROC)load("glGetQueryObjectui64v");
+	glad_glVertexAttribDivisor = (PFNGLVERTEXATTRIBDIVISORPROC)load("glVertexAttribDivisor");
+	glad_glVertexAttribP1ui = (PFNGLVERTEXATTRIBP1UIPROC)load("glVertexAttribP1ui");
+	glad_glVertexAttribP1uiv = (PFNGLVERTEXATTRIBP1UIVPROC)load("glVertexAttribP1uiv");
+	glad_glVertexAttribP2ui = (PFNGLVERTEXATTRIBP2UIPROC)load("glVertexAttribP2ui");
+	glad_glVertexAttribP2uiv = (PFNGLVERTEXATTRIBP2UIVPROC)load("glVertexAttribP2uiv");
+	glad_glVertexAttribP3ui = (PFNGLVERTEXATTRIBP3UIPROC)load("glVertexAttribP3ui");
+	glad_glVertexAttribP3uiv = (PFNGLVERTEXATTRIBP3UIVPROC)load("glVertexAttribP3uiv");
+	glad_glVertexAttribP4ui = (PFNGLVERTEXATTRIBP4UIPROC)load("glVertexAttribP4ui");
+	glad_glVertexAttribP4uiv = (PFNGLVERTEXATTRIBP4UIVPROC)load("glVertexAttribP4uiv");
+	glad_glVertexP2ui = (PFNGLVERTEXP2UIPROC)load("glVertexP2ui");
+	glad_glVertexP2uiv = (PFNGLVERTEXP2UIVPROC)load("glVertexP2uiv");
+	glad_glVertexP3ui = (PFNGLVERTEXP3UIPROC)load("glVertexP3ui");
+	glad_glVertexP3uiv = (PFNGLVERTEXP3UIVPROC)load("glVertexP3uiv");
+	glad_glVertexP4ui = (PFNGLVERTEXP4UIPROC)load("glVertexP4ui");
+	glad_glVertexP4uiv = (PFNGLVERTEXP4UIVPROC)load("glVertexP4uiv");
+	glad_glTexCoordP1ui = (PFNGLTEXCOORDP1UIPROC)load("glTexCoordP1ui");
+	glad_glTexCoordP1uiv = (PFNGLTEXCOORDP1UIVPROC)load("glTexCoordP1uiv");
+	glad_glTexCoordP2ui = (PFNGLTEXCOORDP2UIPROC)load("glTexCoordP2ui");
+	glad_glTexCoordP2uiv = (PFNGLTEXCOORDP2UIVPROC)load("glTexCoordP2uiv");
+	glad_glTexCoordP3ui = (PFNGLTEXCOORDP3UIPROC)load("glTexCoordP3ui");
+	glad_glTexCoordP3uiv = (PFNGLTEXCOORDP3UIVPROC)load("glTexCoordP3uiv");
+	glad_glTexCoordP4ui = (PFNGLTEXCOORDP4UIPROC)load("glTexCoordP4ui");
+	glad_glTexCoordP4uiv = (PFNGLTEXCOORDP4UIVPROC)load("glTexCoordP4uiv");
+	glad_glMultiTexCoordP1ui = (PFNGLMULTITEXCOORDP1UIPROC)load("glMultiTexCoordP1ui");
+	glad_glMultiTexCoordP1uiv = (PFNGLMULTITEXCOORDP1UIVPROC)load("glMultiTexCoordP1uiv");
+	glad_glMultiTexCoordP2ui = (PFNGLMULTITEXCOORDP2UIPROC)load("glMultiTexCoordP2ui");
+	glad_glMultiTexCoordP2uiv = (PFNGLMULTITEXCOORDP2UIVPROC)load("glMultiTexCoordP2uiv");
+	glad_glMultiTexCoordP3ui = (PFNGLMULTITEXCOORDP3UIPROC)load("glMultiTexCoordP3ui");
+	glad_glMultiTexCoordP3uiv = (PFNGLMULTITEXCOORDP3UIVPROC)load("glMultiTexCoordP3uiv");
+	glad_glMultiTexCoordP4ui = (PFNGLMULTITEXCOORDP4UIPROC)load("glMultiTexCoordP4ui");
+	glad_glMultiTexCoordP4uiv = (PFNGLMULTITEXCOORDP4UIVPROC)load("glMultiTexCoordP4uiv");
+	glad_glNormalP3ui = (PFNGLNORMALP3UIPROC)load("glNormalP3ui");
+	glad_glNormalP3uiv = (PFNGLNORMALP3UIVPROC)load("glNormalP3uiv");
+	glad_glColorP3ui = (PFNGLCOLORP3UIPROC)load("glColorP3ui");
+	glad_glColorP3uiv = (PFNGLCOLORP3UIVPROC)load("glColorP3uiv");
+	glad_glColorP4ui = (PFNGLCOLORP4UIPROC)load("glColorP4ui");
+	glad_glColorP4uiv = (PFNGLCOLORP4UIVPROC)load("glColorP4uiv");
+	glad_glSecondaryColorP3ui = (PFNGLSECONDARYCOLORP3UIPROC)load("glSecondaryColorP3ui");
+	glad_glSecondaryColorP3uiv = (PFNGLSECONDARYCOLORP3UIVPROC)load("glSecondaryColorP3uiv");
+}
+static int find_extensionsGL(void) {
+	if (!get_exts()) return 0;
+	(void)& has_ext;
+	free_exts();
+	return 1;
+}
+
+static void find_coreGL(void) {
+
+	/* Thank you @elmindreda
+	 * https://github.com/elmindreda/greg/blob/master/templates/greg.c.in#L176
+	 * https://github.com/glfw/glfw/blob/master/src/context.c#L36
+	 */
+	int i, major, minor;
+
+	const char* version;
+	const char* prefixes[] = {
+		"OpenGL ES-CM ",
+		"OpenGL ES-CL ",
+		"OpenGL ES ",
+		NULL
+	};
+
+	version = (const char*)glGetString(GL_VERSION);
+	if (!version) return;
+
+	for (i = 0; prefixes[i]; i++) {
+		const size_t length = strlen(prefixes[i]);
+		if (strncmp(version, prefixes[i], length) == 0) {
+			version += length;
+			break;
+		}
+	}
+
+	/* PR #18 */
+#ifdef _MSC_VER
+	sscanf_s(version, "%d.%d", &major, &minor);
+#else
+	sscanf(version, "%d.%d", &major, &minor);
+#endif
+
+	GLVersion.major = major; GLVersion.minor = minor;
+	max_loaded_major = major; max_loaded_minor = minor;
+	GLAD_GL_VERSION_1_0 = (major == 1 && minor >= 0) || major > 1;
+	GLAD_GL_VERSION_1_1 = (major == 1 && minor >= 1) || major > 1;
+	GLAD_GL_VERSION_1_2 = (major == 1 && minor >= 2) || major > 1;
+	GLAD_GL_VERSION_1_3 = (major == 1 && minor >= 3) || major > 1;
+	GLAD_GL_VERSION_1_4 = (major == 1 && minor >= 4) || major > 1;
+	GLAD_GL_VERSION_1_5 = (major == 1 && minor >= 5) || major > 1;
+	GLAD_GL_VERSION_2_0 = (major == 2 && minor >= 0) || major > 2;
+	GLAD_GL_VERSION_2_1 = (major == 2 && minor >= 1) || major > 2;
+	GLAD_GL_VERSION_3_0 = (major == 3 && minor >= 0) || major > 3;
+	GLAD_GL_VERSION_3_1 = (major == 3 && minor >= 1) || major > 3;
+	GLAD_GL_VERSION_3_2 = (major == 3 && minor >= 2) || major > 3;
+	GLAD_GL_VERSION_3_3 = (major == 3 && minor >= 3) || major > 3;
+	if (GLVersion.major > 3 || (GLVersion.major >= 3 && GLVersion.minor >= 3)) {
+		max_loaded_major = 3;
+		max_loaded_minor = 3;
+	}
+}
+
+int gladLoadGLLoader(GLADloadproc load) {
+	GLVersion.major = 0; GLVersion.minor = 0;
+	glGetString = (PFNGLGETSTRINGPROC)load("glGetString");
+	if (glGetString == NULL) return 0;
+	if (glGetString(GL_VERSION) == NULL) return 0;
+	find_coreGL();
+	load_GL_VERSION_1_0(load);
+	load_GL_VERSION_1_1(load);
+	load_GL_VERSION_1_2(load);
+	load_GL_VERSION_1_3(load);
+	load_GL_VERSION_1_4(load);
+	load_GL_VERSION_1_5(load);
+	load_GL_VERSION_2_0(load);
+	load_GL_VERSION_2_1(load);
+	load_GL_VERSION_3_0(load);
+	load_GL_VERSION_3_1(load);
+	load_GL_VERSION_3_2(load);
+	load_GL_VERSION_3_3(load);
+
+	if (!find_extensionsGL()) return 0;
+	return GLVersion.major != 0 || GLVersion.minor != 0;
+}
+
diff --git a/src/ThirdParty/stb_image.hpp b/src/ThirdParty/stb_image.hpp
new file mode 100644
index 0000000..764761a
--- /dev/null
+++ b/src/ThirdParty/stb_image.hpp
@@ -0,0 +1,7568 @@
+/* stb_image - v2.19 - public domain image loader - http://nothings.org/stb
+								  no warranty implied; use at your own risk
+
+   Do this:
+	  #define STB_IMAGE_IMPLEMENTATION
+   before you include this file in *one* C or C++ file to create the implementation.
+
+   // i.e. it should look like this:
+   #include ...
+   #include ...
+   #include ...
+   #define STB_IMAGE_IMPLEMENTATION
+   #include "stb_image.h"
+
+   You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
+   And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
+
+
+   QUICK NOTES:
+	  Primarily of interest to game developers and other people who can
+		  avoid problematic images and only need the trivial interface
+
+	  JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
+	  PNG 1/2/4/8/16-bit-per-channel
+
+	  TGA (not sure what subset, if a subset)
+	  BMP non-1bpp, non-RLE
+	  PSD (composited view only, no extra channels, 8/16 bit-per-channel)
+
+	  GIF (*comp always reports as 4-channel)
+	  HDR (radiance rgbE format)
+	  PIC (Softimage PIC)
+	  PNM (PPM and PGM binary only)
+
+	  Animated GIF still needs a proper API, but here's one way to do it:
+		  http://gist.github.com/urraka/685d9a6340b26b830d49
+
+	  - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
+	  - decode from arbitrary I/O callbacks
+	  - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
+
+   Full documentation under "DOCUMENTATION" below.
+
+
+LICENSE
+
+  See end of file for license information.
+
+RECENT REVISION HISTORY:
+
+	  2.19  (2018-02-11) fix warning
+	  2.18  (2018-01-30) fix warnings
+	  2.17  (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings
+	  2.16  (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
+	  2.15  (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
+	  2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
+	  2.13  (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
+	  2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
+	  2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
+						 RGB-format JPEG; remove white matting in PSD;
+						 allocate large structures on the stack;
+						 correct channel count for PNG & BMP
+	  2.10  (2016-01-22) avoid warning introduced in 2.09
+	  2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
+
+   See end of file for full revision history.
+
+
+ ============================    Contributors    =========================
+
+ Image formats                          Extensions, features
+	Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info)
+	Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info)
+	Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG)
+	Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks)
+	Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)
+	Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)
+	Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)
+	github:urraka (animated gif)           Junggon Kim (PNM comments)
+	Christopher Forseth (animated gif)     Daniel Gibson (16-bit TGA)
+										   socks-the-fox (16-bit PNG)
+										   Jeremy Sawicki (handle all ImageNet JPGs)
+ Optimizations & bugfixes                  Mikhail Morozov (1-bit BMP)
+	Fabian "ryg" Giesen                    Anael Seghezzi (is-16-bit query)
+	Arseny Kapoulkine
+	John-Mark Allen
+
+ Bug & warning fixes
+	Marc LeBlanc            David Woo          Guillaume George   Martins Mozeiko
+	Christpher Lloyd        Jerry Jansson      Joseph Thomson     Phil Jordan
+	Dave Moore              Roy Eltham         Hayaki Saito       Nathan Reed
+	Won Chun                Luke Graham        Johan Duparc       Nick Verigakis
+	the Horde3D community   Thomas Ruf         Ronny Chevalier    github:rlyeh
+	Janez Zemva             John Bartholomew   Michal Cichon      github:romigrou
+	Jonathan Blow           Ken Hamada         Tero Hanninen      github:svdijk
+	Laurent Gomila          Cort Stratton      Sergio Gonzalez    github:snagar
+	Aruelien Pocheville     Thibault Reuille   Cass Everitt       github:Zelex
+	Ryamond Barbiero        Paul Du Bois       Engin Manap        github:grim210
+	Aldo Culquicondor       Philipp Wiesemann  Dale Weiler        github:sammyhw
+	Oriol Ferrer Mesia      Josh Tobin         Matthew Gregan     github:phprus
+	Julian Raschke          Gregory Mullen     Baldur Karlsson    github:poppolopoppo
+	Christian Floisand      Kevin Schmidt                         github:darealshinji
+	Blazej Dariusz Roszkowski                                     github:Michaelangel007
+*/
+
+#ifndef STBI_INCLUDE_STB_IMAGE_H
+#define STBI_INCLUDE_STB_IMAGE_H
+
+// DOCUMENTATION
+//
+// Limitations:
+//    - no 12-bit-per-channel JPEG
+//    - no JPEGs with arithmetic coding
+//    - GIF always returns *comp=4
+//
+// Basic usage (see HDR discussion below for HDR usage):
+//    int x,y,n;
+//    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
+//    // ... process data if not NULL ...
+//    // ... x = width, y = height, n = # 8-bit components per pixel ...
+//    // ... replace '0' with '1'..'4' to force that many components per pixel
+//    // ... but 'n' will always be the number that it would have been if you said 0
+//    stbi_image_free(data)
+//
+// Standard parameters:
+//    int *x                 -- outputs image width in pixels
+//    int *y                 -- outputs image height in pixels
+//    int *channels_in_file  -- outputs # of image components in image file
+//    int desired_channels   -- if non-zero, # of image components requested in result
+//
+// The return value from an image loader is an 'unsigned char *' which points
+// to the pixel data, or NULL on an allocation failure or if the image is
+// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
+// with each pixel consisting of N interleaved 8-bit components; the first
+// pixel pointed to is top-left-most in the image. There is no padding between
+// image scanlines or between pixels, regardless of format. The number of
+// components N is 'desired_channels' if desired_channels is non-zero, or
+// *channels_in_file otherwise. If desired_channels is non-zero,
+// *channels_in_file has the number of components that _would_ have been
+// output otherwise. E.g. if you set desired_channels to 4, you will always
+// get RGBA output, but you can check *channels_in_file to see if it's trivially
+// opaque because e.g. there were only 3 channels in the source image.
+//
+// An output image with N components has the following components interleaved
+// in this order in each pixel:
+//
+//     N=#comp     components
+//       1           grey
+//       2           grey, alpha
+//       3           red, green, blue
+//       4           red, green, blue, alpha
+//
+// If image loading fails for any reason, the return value will be NULL,
+// and *x, *y, *channels_in_file will be unchanged. The function
+// stbi_failure_reason() can be queried for an extremely brief, end-user
+// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
+// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
+// more user-friendly ones.
+//
+// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
+//
+// ===========================================================================
+//
+// Philosophy
+//
+// stb libraries are designed with the following priorities:
+//
+//    1. easy to use
+//    2. easy to maintain
+//    3. good performance
+//
+// Sometimes I let "good performance" creep up in priority over "easy to maintain",
+// and for best performance I may provide less-easy-to-use APIs that give higher
+// performance, in addition to the easy to use ones. Nevertheless, it's important
+// to keep in mind that from the standpoint of you, a client of this library,
+// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
+//
+// Some secondary priorities arise directly from the first two, some of which
+// make more explicit reasons why performance can't be emphasized.
+//
+//    - Portable ("ease of use")
+//    - Small source code footprint ("easy to maintain")
+//    - No dependencies ("ease of use")
+//
+// ===========================================================================
+//
+// I/O callbacks
+//
+// I/O callbacks allow you to read from arbitrary sources, like packaged
+// files or some other source. Data read from callbacks are processed
+// through a small internal buffer (currently 128 bytes) to try to reduce
+// overhead.
+//
+// The three functions you must define are "read" (reads some bytes of data),
+// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
+//
+// ===========================================================================
+//
+// SIMD support
+//
+// The JPEG decoder will try to automatically use SIMD kernels on x86 when
+// supported by the compiler. For ARM Neon support, you must explicitly
+// request it.
+//
+// (The old do-it-yourself SIMD API is no longer supported in the current
+// code.)
+//
+// On x86, SSE2 will automatically be used when available based on a run-time
+// test; if not, the generic C versions are used as a fall-back. On ARM targets,
+// the typical path is to have separate builds for NEON and non-NEON devices
+// (at least this is true for iOS and Android). Therefore, the NEON support is
+// toggled by a build flag: define STBI_NEON to get NEON loops.
+//
+// If for some reason you do not want to use any of SIMD code, or if
+// you have issues compiling it, you can disable it entirely by
+// defining STBI_NO_SIMD.
+//
+// ===========================================================================
+//
+// HDR image support   (disable by defining STBI_NO_HDR)
+//
+// stb_image now supports loading HDR images in general, and currently
+// the Radiance .HDR file format, although the support is provided
+// generically. You can still load any file through the existing interface;
+// if you attempt to load an HDR file, it will be automatically remapped to
+// LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
+// both of these constants can be reconfigured through this interface:
+//
+//     stbi_hdr_to_ldr_gamma(2.2f);
+//     stbi_hdr_to_ldr_scale(1.0f);
+//
+// (note, do not use _inverse_ constants; stbi_image will invert them
+// appropriately).
+//
+// Additionally, there is a new, parallel interface for loading files as
+// (linear) floats to preserve the full dynamic range:
+//
+//    float *data = stbi_loadf(filename, &x, &y, &n, 0);
+//
+// If you load LDR images through this interface, those images will
+// be promoted to floating point values, run through the inverse of
+// constants corresponding to the above:
+//
+//     stbi_ldr_to_hdr_scale(1.0f);
+//     stbi_ldr_to_hdr_gamma(2.2f);
+//
+// Finally, given a filename (or an open file or memory block--see header
+// file for details) containing image data, you can query for the "most
+// appropriate" interface to use (that is, whether the image is HDR or
+// not), using:
+//
+//     stbi_is_hdr(char *filename);
+//
+// ===========================================================================
+//
+// iPhone PNG support:
+//
+// By default we convert iphone-formatted PNGs back to RGB, even though
+// they are internally encoded differently. You can disable this conversion
+// by by calling stbi_convert_iphone_png_to_rgb(0), in which case
+// you will always just get the native iphone "format" through (which
+// is BGR stored in RGB).
+//
+// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
+// pixel to remove any premultiplied alpha *only* if the image file explicitly
+// says there's premultiplied data (currently only happens in iPhone images,
+// and only if iPhone convert-to-rgb processing is on).
+//
+// ===========================================================================
+//
+// ADDITIONAL CONFIGURATION
+//
+//  - You can suppress implementation of any of the decoders to reduce
+//    your code footprint by #defining one or more of the following
+//    symbols before creating the implementation.
+//
+//        STBI_NO_JPEG
+//        STBI_NO_PNG
+//        STBI_NO_BMP
+//        STBI_NO_PSD
+//        STBI_NO_TGA
+//        STBI_NO_GIF
+//        STBI_NO_HDR
+//        STBI_NO_PIC
+//        STBI_NO_PNM   (.ppm and .pgm)
+//
+//  - You can request *only* certain decoders and suppress all other ones
+//    (this will be more forward-compatible, as addition of new decoders
+//    doesn't require you to disable them explicitly):
+//
+//        STBI_ONLY_JPEG
+//        STBI_ONLY_PNG
+//        STBI_ONLY_BMP
+//        STBI_ONLY_PSD
+//        STBI_ONLY_TGA
+//        STBI_ONLY_GIF
+//        STBI_ONLY_HDR
+//        STBI_ONLY_PIC
+//        STBI_ONLY_PNM   (.ppm and .pgm)
+//
+//   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
+//     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
+//
+
+
+#ifndef STBI_NO_STDIO
+#include <stdio.h>
+#endif // STBI_NO_STDIO
+
+#define STBI_VERSION 1
+
+enum
+{
+	STBI_default = 0, // only used for desired_channels
+
+	STBI_grey = 1,
+	STBI_grey_alpha = 2,
+	STBI_rgb = 3,
+	STBI_rgb_alpha = 4
+};
+
+typedef unsigned char stbi_uc;
+typedef unsigned short stbi_us;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef STB_IMAGE_STATIC
+#define STBIDEF static
+#else
+#define STBIDEF extern
+#endif
+
+	//////////////////////////////////////////////////////////////////////////////
+	//
+	// PRIMARY API - works on images of any type
+	//
+
+	//
+	// load image by filename, open file, or memory buffer
+	//
+
+	typedef struct
+	{
+		int      (*read)  (void* user, char* data, int size);   // fill 'data' with 'size' bytes.  return number of bytes actually read
+		void     (*skip)  (void* user, int n);                 // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
+		int      (*eof)   (void* user);                       // returns nonzero if we are at end of file/data
+	} stbi_io_callbacks;
+
+	////////////////////////////////////
+	//
+	// 8-bits-per-channel interface
+	//
+
+	STBIDEF stbi_uc* stbi_load_from_memory(stbi_uc           const* buffer, int len, int* x, int* y, int* channels_in_file, int desired_channels);
+	STBIDEF stbi_uc* stbi_load_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* channels_in_file, int desired_channels);
+#ifndef STBI_NO_GIF
+	STBIDEF stbi_uc* stbi_load_gif_from_memory(stbi_uc const* buffer, int len, int** delays, int* x, int* y, int* z, int* comp, int req_comp);
+#endif
+
+
+#ifndef STBI_NO_STDIO
+	STBIDEF stbi_uc* stbi_load(char const* filename, int* x, int* y, int* channels_in_file, int desired_channels);
+	STBIDEF stbi_uc* stbi_load_from_file(FILE* f, int* x, int* y, int* channels_in_file, int desired_channels);
+	// for stbi_load_from_file, file pointer is left pointing immediately after image
+#endif
+
+////////////////////////////////////
+//
+// 16-bits-per-channel interface
+//
+
+	STBIDEF stbi_us* stbi_load_16_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* channels_in_file, int desired_channels);
+	STBIDEF stbi_us* stbi_load_16_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* channels_in_file, int desired_channels);
+
+#ifndef STBI_NO_STDIO
+	STBIDEF stbi_us* stbi_load_16(char const* filename, int* x, int* y, int* channels_in_file, int desired_channels);
+	STBIDEF stbi_us* stbi_load_from_file_16(FILE* f, int* x, int* y, int* channels_in_file, int desired_channels);
+#endif
+
+	////////////////////////////////////
+	//
+	// float-per-channel interface
+	//
+#ifndef STBI_NO_LINEAR
+	STBIDEF float* stbi_loadf_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* channels_in_file, int desired_channels);
+	STBIDEF float* stbi_loadf_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* channels_in_file, int desired_channels);
+
+#ifndef STBI_NO_STDIO
+	STBIDEF float* stbi_loadf(char const* filename, int* x, int* y, int* channels_in_file, int desired_channels);
+	STBIDEF float* stbi_loadf_from_file(FILE* f, int* x, int* y, int* channels_in_file, int desired_channels);
+#endif
+#endif
+
+#ifndef STBI_NO_HDR
+	STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma);
+	STBIDEF void   stbi_hdr_to_ldr_scale(float scale);
+#endif // STBI_NO_HDR
+
+#ifndef STBI_NO_LINEAR
+	STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma);
+	STBIDEF void   stbi_ldr_to_hdr_scale(float scale);
+#endif // STBI_NO_LINEAR
+
+	// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
+	STBIDEF int    stbi_is_hdr_from_callbacks(stbi_io_callbacks const* clbk, void* user);
+	STBIDEF int    stbi_is_hdr_from_memory(stbi_uc const* buffer, int len);
+#ifndef STBI_NO_STDIO
+	STBIDEF int      stbi_is_hdr(char const* filename);
+	STBIDEF int      stbi_is_hdr_from_file(FILE* f);
+#endif // STBI_NO_STDIO
+
+
+	// get a VERY brief reason for failure
+	// NOT THREADSAFE
+	STBIDEF const char* stbi_failure_reason(void);
+
+	// free the loaded image -- this is just free()
+	STBIDEF void     stbi_image_free(void* retval_from_stbi_load);
+
+	// get image dimensions & components without fully decoding
+	STBIDEF int      stbi_info_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* comp);
+	STBIDEF int      stbi_info_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* comp);
+	STBIDEF int      stbi_is_16_bit_from_memory(stbi_uc const* buffer, int len);
+	STBIDEF int      stbi_is_16_bit_from_callbacks(stbi_io_callbacks const* clbk, void* user);
+
+#ifndef STBI_NO_STDIO
+	STBIDEF int      stbi_info(char const* filename, int* x, int* y, int* comp);
+	STBIDEF int      stbi_info_from_file(FILE* f, int* x, int* y, int* comp);
+	STBIDEF int      stbi_is_16_bit(char const* filename);
+	STBIDEF int      stbi_is_16_bit_from_file(FILE* f);
+#endif
+
+
+
+	// for image formats that explicitly notate that they have premultiplied alpha,
+	// we just return the colors as stored in the file. set this flag to force
+	// unpremultiplication. results are undefined if the unpremultiply overflow.
+	STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
+
+	// indicate whether we should process iphone images back to canonical format,
+	// or just pass them through "as-is"
+	STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
+
+	// flip the image vertically, so the first pixel in the output array is the bottom left
+	STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
+
+	// ZLIB client - used by PNG, available for other purposes
+
+	STBIDEF char* stbi_zlib_decode_malloc_guesssize(const char* buffer, int len, int initial_size, int* outlen);
+	STBIDEF char* stbi_zlib_decode_malloc_guesssize_headerflag(const char* buffer, int len, int initial_size, int* outlen, int parse_header);
+	STBIDEF char* stbi_zlib_decode_malloc(const char* buffer, int len, int* outlen);
+	STBIDEF int   stbi_zlib_decode_buffer(char* obuffer, int olen, const char* ibuffer, int ilen);
+
+	STBIDEF char* stbi_zlib_decode_noheader_malloc(const char* buffer, int len, int* outlen);
+	STBIDEF int   stbi_zlib_decode_noheader_buffer(char* obuffer, int olen, const char* ibuffer, int ilen);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+//
+//
+////   end header file   /////////////////////////////////////////////////////
+#endif // STBI_INCLUDE_STB_IMAGE_H
+
+#ifdef STB_IMAGE_IMPLEMENTATION
+
+#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
+  || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
+  || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
+  || defined(STBI_ONLY_ZLIB)
+#ifndef STBI_ONLY_JPEG
+#define STBI_NO_JPEG
+#endif
+#ifndef STBI_ONLY_PNG
+#define STBI_NO_PNG
+#endif
+#ifndef STBI_ONLY_BMP
+#define STBI_NO_BMP
+#endif
+#ifndef STBI_ONLY_PSD
+#define STBI_NO_PSD
+#endif
+#ifndef STBI_ONLY_TGA
+#define STBI_NO_TGA
+#endif
+#ifndef STBI_ONLY_GIF
+#define STBI_NO_GIF
+#endif
+#ifndef STBI_ONLY_HDR
+#define STBI_NO_HDR
+#endif
+#ifndef STBI_ONLY_PIC
+#define STBI_NO_PIC
+#endif
+#ifndef STBI_ONLY_PNM
+#define STBI_NO_PNM
+#endif
+#endif
+
+#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
+#define STBI_NO_ZLIB
+#endif
+
+
+#include <stdarg.h>
+#include <stddef.h> // ptrdiff_t on osx
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
+#include <math.h>  // ldexp, pow
+#endif
+
+#ifndef STBI_NO_STDIO
+#include <stdio.h>
+#endif
+
+#ifndef STBI_ASSERT
+#include <assert.h>
+#define STBI_ASSERT(x) assert(x)
+#endif
+
+
+#ifndef _MSC_VER
+#ifdef __cplusplus
+#define stbi_inline inline
+#else
+#define stbi_inline
+#endif
+#else
+#define stbi_inline __forceinline
+#endif
+
+
+#ifdef _MSC_VER
+typedef unsigned short stbi__uint16;
+typedef   signed short stbi__int16;
+typedef unsigned int   stbi__uint32;
+typedef   signed int   stbi__int32;
+#else
+#include <stdint.h>
+typedef uint16_t stbi__uint16;
+typedef int16_t  stbi__int16;
+typedef uint32_t stbi__uint32;
+typedef int32_t  stbi__int32;
+#endif
+
+// should produce compiler error if size is wrong
+typedef unsigned char validate_uint32[sizeof(stbi__uint32) == 4 ? 1 : -1];
+
+#ifdef _MSC_VER
+#define STBI_NOTUSED(v)  (void)(v)
+#else
+#define STBI_NOTUSED(v)  (void)sizeof(v)
+#endif
+
+#ifdef _MSC_VER
+#define STBI_HAS_LROTL
+#endif
+
+#ifdef STBI_HAS_LROTL
+#define stbi_lrot(x,y)  _lrotl(x,y)
+#else
+#define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
+#endif
+
+#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
+// ok
+#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
+// ok
+#else
+#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
+#endif
+
+#ifndef STBI_MALLOC
+#define STBI_MALLOC(sz)           malloc(sz)
+#define STBI_REALLOC(p,newsz)     realloc(p,newsz)
+#define STBI_FREE(p)              free(p)
+#endif
+
+#ifndef STBI_REALLOC_SIZED
+#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
+#endif
+
+// x86/x64 detection
+#if defined(__x86_64__) || defined(_M_X64)
+#define STBI__X64_TARGET
+#elif defined(__i386) || defined(_M_IX86)
+#define STBI__X86_TARGET
+#endif
+
+#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
+// gcc doesn't support sse2 intrinsics unless you compile with -msse2,
+// which in turn means it gets to use SSE2 everywhere. This is unfortunate,
+// but previous attempts to provide the SSE2 functions with runtime
+// detection caused numerous issues. The way architecture extensions are
+// exposed in GCC/Clang is, sadly, not really suited for one-file libs.
+// New behavior: if compiled with -msse2, we use SSE2 without any
+// detection; if not, we don't use it at all.
+#define STBI_NO_SIMD
+#endif
+
+#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
+// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
+//
+// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
+// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
+// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
+// simultaneously enabling "-mstackrealign".
+//
+// See https://github.com/nothings/stb/issues/81 for more information.
+//
+// So default to no SSE2 on 32-bit MinGW. If you've read this far and added
+// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
+#define STBI_NO_SIMD
+#endif
+
+#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
+#define STBI_SSE2
+#include <emmintrin.h>
+
+#ifdef _MSC_VER
+
+#if _MSC_VER >= 1400  // not VC6
+#include <intrin.h> // __cpuid
+static int stbi__cpuid3(void)
+{
+	int info[4];
+	__cpuid(info, 1);
+	return info[3];
+}
+#else
+static int stbi__cpuid3(void)
+{
+	int res;
+	__asm {
+		mov  eax, 1
+		cpuid
+		mov  res, edx
+	}
+	return res;
+}
+#endif
+
+#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
+
+static int stbi__sse2_available(void)
+{
+	int info3 = stbi__cpuid3();
+	return ((info3 >> 26) & 1) != 0;
+}
+#else // assume GCC-style if not VC++
+#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
+
+static int stbi__sse2_available(void)
+{
+	// If we're even attempting to compile this on GCC/Clang, that means
+	// -msse2 is on, which means the compiler is allowed to use SSE2
+	// instructions at will, and so are we.
+	return 1;
+}
+#endif
+#endif
+
+// ARM NEON
+#if defined(STBI_NO_SIMD) && defined(STBI_NEON)
+#undef STBI_NEON
+#endif
+
+#ifdef STBI_NEON
+#include <arm_neon.h>
+// assume GCC or Clang on ARM targets
+#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
+#endif
+
+#ifndef STBI_SIMD_ALIGN
+#define STBI_SIMD_ALIGN(type, name) type name
+#endif
+
+///////////////////////////////////////////////
+//
+//  stbi__context struct and start_xxx functions
+
+// stbi__context structure is our basic context used by all images, so it
+// contains all the IO context, plus some basic image information
+typedef struct
+{
+	stbi__uint32 img_x, img_y;
+	int img_n, img_out_n;
+
+	stbi_io_callbacks io;
+	void* io_user_data;
+
+	int read_from_callbacks;
+	int buflen;
+	stbi_uc buffer_start[128];
+
+	stbi_uc* img_buffer, * img_buffer_end;
+	stbi_uc* img_buffer_original, * img_buffer_original_end;
+} stbi__context;
+
+
+static void stbi__refill_buffer(stbi__context* s);
+
+// initialize a memory-decode context
+static void stbi__start_mem(stbi__context* s, stbi_uc const* buffer, int len)
+{
+	s->io.read = NULL;
+	s->read_from_callbacks = 0;
+	s->img_buffer = s->img_buffer_original = (stbi_uc*)buffer;
+	s->img_buffer_end = s->img_buffer_original_end = (stbi_uc*)buffer + len;
+}
+
+// initialize a callback-based context
+static void stbi__start_callbacks(stbi__context* s, stbi_io_callbacks* c, void* user)
+{
+	s->io = *c;
+	s->io_user_data = user;
+	s->buflen = sizeof(s->buffer_start);
+	s->read_from_callbacks = 1;
+	s->img_buffer_original = s->buffer_start;
+	stbi__refill_buffer(s);
+	s->img_buffer_original_end = s->img_buffer_end;
+}
+
+#ifndef STBI_NO_STDIO
+
+static int stbi__stdio_read(void* user, char* data, int size)
+{
+	return (int)fread(data, 1, size, (FILE*)user);
+}
+
+static void stbi__stdio_skip(void* user, int n)
+{
+	fseek((FILE*)user, n, SEEK_CUR);
+}
+
+static int stbi__stdio_eof(void* user)
+{
+	return feof((FILE*)user);
+}
+
+static stbi_io_callbacks stbi__stdio_callbacks =
+{
+   stbi__stdio_read,
+   stbi__stdio_skip,
+   stbi__stdio_eof,
+};
+
+static void stbi__start_file(stbi__context* s, FILE* f)
+{
+	stbi__start_callbacks(s, &stbi__stdio_callbacks, (void*)f);
+}
+
+//static void stop_file(stbi__context *s) { }
+
+#endif // !STBI_NO_STDIO
+
+static void stbi__rewind(stbi__context* s)
+{
+	// conceptually rewind SHOULD rewind to the beginning of the stream,
+	// but we just rewind to the beginning of the initial buffer, because
+	// we only use it after doing 'test', which only ever looks at at most 92 bytes
+	s->img_buffer = s->img_buffer_original;
+	s->img_buffer_end = s->img_buffer_original_end;
+}
+
+enum
+{
+	STBI_ORDER_RGB,
+	STBI_ORDER_BGR
+};
+
+typedef struct
+{
+	int bits_per_channel;
+	int num_channels;
+	int channel_order;
+} stbi__result_info;
+
+#ifndef STBI_NO_JPEG
+static int      stbi__jpeg_test(stbi__context* s);
+static void* stbi__jpeg_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);
+static int      stbi__jpeg_info(stbi__context* s, int* x, int* y, int* comp);
+#endif
+
+#ifndef STBI_NO_PNG
+static int      stbi__png_test(stbi__context* s);
+static void* stbi__png_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);
+static int      stbi__png_info(stbi__context* s, int* x, int* y, int* comp);
+static int      stbi__png_is16(stbi__context* s);
+#endif
+
+#ifndef STBI_NO_BMP
+static int      stbi__bmp_test(stbi__context* s);
+static void* stbi__bmp_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);
+static int      stbi__bmp_info(stbi__context* s, int* x, int* y, int* comp);
+#endif
+
+#ifndef STBI_NO_TGA
+static int      stbi__tga_test(stbi__context* s);
+static void* stbi__tga_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);
+static int      stbi__tga_info(stbi__context* s, int* x, int* y, int* comp);
+#endif
+
+#ifndef STBI_NO_PSD
+static int      stbi__psd_test(stbi__context* s);
+static void* stbi__psd_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri, int bpc);
+static int      stbi__psd_info(stbi__context* s, int* x, int* y, int* comp);
+static int      stbi__psd_is16(stbi__context* s);
+#endif
+
+#ifndef STBI_NO_HDR
+static int      stbi__hdr_test(stbi__context* s);
+static float* stbi__hdr_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);
+static int      stbi__hdr_info(stbi__context* s, int* x, int* y, int* comp);
+#endif
+
+#ifndef STBI_NO_PIC
+static int      stbi__pic_test(stbi__context* s);
+static void* stbi__pic_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);
+static int      stbi__pic_info(stbi__context* s, int* x, int* y, int* comp);
+#endif
+
+#ifndef STBI_NO_GIF
+static int      stbi__gif_test(stbi__context* s);
+static void* stbi__gif_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);
+static void* stbi__load_gif_main(stbi__context* s, int** delays, int* x, int* y, int* z, int* comp, int req_comp);
+static int      stbi__gif_info(stbi__context* s, int* x, int* y, int* comp);
+#endif
+
+#ifndef STBI_NO_PNM
+static int      stbi__pnm_test(stbi__context* s);
+static void* stbi__pnm_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);
+static int      stbi__pnm_info(stbi__context* s, int* x, int* y, int* comp);
+#endif
+
+// this is not threadsafe
+static const char* stbi__g_failure_reason;
+
+STBIDEF const char* stbi_failure_reason(void)
+{
+	return stbi__g_failure_reason;
+}
+
+static int stbi__err(const char* str)
+{
+	stbi__g_failure_reason = str;
+	return 0;
+}
+
+static void* stbi__malloc(size_t size)
+{
+	return STBI_MALLOC(size);
+}
+
+// stb_image uses ints pervasively, including for offset calculations.
+// therefore the largest decoded image size we can support with the
+// current code, even on 64-bit targets, is INT_MAX. this is not a
+// significant limitation for the intended use case.
+//
+// we do, however, need to make sure our size calculations don't
+// overflow. hence a few helper functions for size calculations that
+// multiply integers together, making sure that they're non-negative
+// and no overflow occurs.
+
+// return 1 if the sum is valid, 0 on overflow.
+// negative terms are considered invalid.
+static int stbi__addsizes_valid(int a, int b)
+{
+	if (b < 0) return 0;
+	// now 0 <= b <= INT_MAX, hence also
+	// 0 <= INT_MAX - b <= INTMAX.
+	// And "a + b <= INT_MAX" (which might overflow) is the
+	// same as a <= INT_MAX - b (no overflow)
+	return a <= INT_MAX - b;
+}
+
+// returns 1 if the product is valid, 0 on overflow.
+// negative factors are considered invalid.
+static int stbi__mul2sizes_valid(int a, int b)
+{
+	if (a < 0 || b < 0) return 0;
+	if (b == 0) return 1; // mul-by-0 is always safe
+	// portable way to check for no overflows in a*b
+	return a <= INT_MAX / b;
+}
+
+// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
+static int stbi__mad2sizes_valid(int a, int b, int add)
+{
+	return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a * b, add);
+}
+
+// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
+static int stbi__mad3sizes_valid(int a, int b, int c, int add)
+{
+	return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a * b, c) &&
+		stbi__addsizes_valid(a * b * c, add);
+}
+
+// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
+static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
+{
+	return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a * b, c) &&
+		stbi__mul2sizes_valid(a * b * c, d) && stbi__addsizes_valid(a * b * c * d, add);
+}
+#endif
+
+// mallocs with size overflow checking
+static void* stbi__malloc_mad2(int a, int b, int add)
+{
+	if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
+	return stbi__malloc(a * b + add);
+}
+
+static void* stbi__malloc_mad3(int a, int b, int c, int add)
+{
+	if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
+	return stbi__malloc(a * b * c + add);
+}
+
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
+static void* stbi__malloc_mad4(int a, int b, int c, int d, int add)
+{
+	if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
+	return stbi__malloc(a * b * c * d + add);
+}
+#endif
+
+// stbi__err - error
+// stbi__errpf - error returning pointer to float
+// stbi__errpuc - error returning pointer to unsigned char
+
+#ifdef STBI_NO_FAILURE_STRINGS
+#define stbi__err(x,y)  0
+#elif defined(STBI_FAILURE_USERMSG)
+#define stbi__err(x,y)  stbi__err(y)
+#else
+#define stbi__err(x,y)  stbi__err(x)
+#endif
+
+#define stbi__errpf(x,y)   ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
+#define stbi__errpuc(x,y)  ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
+
+STBIDEF void stbi_image_free(void* retval_from_stbi_load)
+{
+	STBI_FREE(retval_from_stbi_load);
+}
+
+#ifndef STBI_NO_LINEAR
+static float* stbi__ldr_to_hdr(stbi_uc* data, int x, int y, int comp);
+#endif
+
+#ifndef STBI_NO_HDR
+static stbi_uc* stbi__hdr_to_ldr(float* data, int x, int y, int comp);
+#endif
+
+static int stbi__vertically_flip_on_load = 0;
+
+STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
+{
+	stbi__vertically_flip_on_load = flag_true_if_should_flip;
+}
+
+static void* stbi__load_main(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri, int bpc)
+{
+	memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
+	ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
+	ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
+	ri->num_channels = 0;
+
+#ifndef STBI_NO_JPEG
+	if (stbi__jpeg_test(s)) return stbi__jpeg_load(s, x, y, comp, req_comp, ri);
+#endif
+#ifndef STBI_NO_PNG
+	if (stbi__png_test(s))  return stbi__png_load(s, x, y, comp, req_comp, ri);
+#endif
+#ifndef STBI_NO_BMP
+	if (stbi__bmp_test(s))  return stbi__bmp_load(s, x, y, comp, req_comp, ri);
+#endif
+#ifndef STBI_NO_GIF
+	if (stbi__gif_test(s))  return stbi__gif_load(s, x, y, comp, req_comp, ri);
+#endif
+#ifndef STBI_NO_PSD
+	if (stbi__psd_test(s))  return stbi__psd_load(s, x, y, comp, req_comp, ri, bpc);
+#endif
+#ifndef STBI_NO_PIC
+	if (stbi__pic_test(s))  return stbi__pic_load(s, x, y, comp, req_comp, ri);
+#endif
+#ifndef STBI_NO_PNM
+	if (stbi__pnm_test(s))  return stbi__pnm_load(s, x, y, comp, req_comp, ri);
+#endif
+
+#ifndef STBI_NO_HDR
+	if (stbi__hdr_test(s)) {
+		float* hdr = stbi__hdr_load(s, x, y, comp, req_comp, ri);
+		return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
+	}
+#endif
+
+#ifndef STBI_NO_TGA
+	// test tga last because it's a crappy test!
+	if (stbi__tga_test(s))
+		return stbi__tga_load(s, x, y, comp, req_comp, ri);
+#endif
+
+	return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
+}
+
+static stbi_uc* stbi__convert_16_to_8(stbi__uint16* orig, int w, int h, int channels)
+{
+	int i;
+	int img_len = w * h * channels;
+	stbi_uc* reduced;
+
+	reduced = (stbi_uc*)stbi__malloc(img_len);
+	if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
+
+	for (i = 0; i < img_len; ++i)
+		reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
+
+	STBI_FREE(orig);
+	return reduced;
+}
+
+static stbi__uint16* stbi__convert_8_to_16(stbi_uc* orig, int w, int h, int channels)
+{
+	int i;
+	int img_len = w * h * channels;
+	stbi__uint16* enlarged;
+
+	enlarged = (stbi__uint16*)stbi__malloc(img_len * 2);
+	if (enlarged == NULL) return (stbi__uint16*)stbi__errpuc("outofmem", "Out of memory");
+
+	for (i = 0; i < img_len; ++i)
+		enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
+
+	STBI_FREE(orig);
+	return enlarged;
+}
+
+static void stbi__vertical_flip(void* image, int w, int h, int bytes_per_pixel)
+{
+	int row;
+	size_t bytes_per_row = (size_t)w * bytes_per_pixel;
+	stbi_uc temp[2048];
+	stbi_uc* bytes = (stbi_uc*)image;
+
+	for (row = 0; row < (h >> 1); row++) {
+		stbi_uc* row0 = bytes + row * bytes_per_row;
+		stbi_uc* row1 = bytes + (h - row - 1) * bytes_per_row;
+		// swap row0 with row1
+		size_t bytes_left = bytes_per_row;
+		while (bytes_left) {
+			size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
+			memcpy(temp, row0, bytes_copy);
+			memcpy(row0, row1, bytes_copy);
+			memcpy(row1, temp, bytes_copy);
+			row0 += bytes_copy;
+			row1 += bytes_copy;
+			bytes_left -= bytes_copy;
+		}
+	}
+}
+
+static void stbi__vertical_flip_slices(void* image, int w, int h, int z, int bytes_per_pixel)
+{
+	int slice;
+	int slice_size = w * h * bytes_per_pixel;
+
+	stbi_uc* bytes = (stbi_uc*)image;
+	for (slice = 0; slice < z; ++slice) {
+		stbi__vertical_flip(bytes, w, h, bytes_per_pixel);
+		bytes += slice_size;
+	}
+}
+
+static unsigned char* stbi__load_and_postprocess_8bit(stbi__context* s, int* x, int* y, int* comp, int req_comp)
+{
+	stbi__result_info ri;
+	void* result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
+
+	if (result == NULL)
+		return NULL;
+
+	if (ri.bits_per_channel != 8) {
+		STBI_ASSERT(ri.bits_per_channel == 16);
+		result = stbi__convert_16_to_8((stbi__uint16*)result, *x, *y, req_comp == 0 ? *comp : req_comp);
+		ri.bits_per_channel = 8;
+	}
+
+	// @TODO: move stbi__convert_format to here
+
+	if (stbi__vertically_flip_on_load) {
+		int channels = req_comp ? req_comp : *comp;
+		stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
+	}
+
+	return (unsigned char*)result;
+}
+
+static stbi__uint16* stbi__load_and_postprocess_16bit(stbi__context* s, int* x, int* y, int* comp, int req_comp)
+{
+	stbi__result_info ri;
+	void* result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
+
+	if (result == NULL)
+		return NULL;
+
+	if (ri.bits_per_channel != 16) {
+		STBI_ASSERT(ri.bits_per_channel == 8);
+		result = stbi__convert_8_to_16((stbi_uc*)result, *x, *y, req_comp == 0 ? *comp : req_comp);
+		ri.bits_per_channel = 16;
+	}
+
+	// @TODO: move stbi__convert_format16 to here
+	// @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
+
+	if (stbi__vertically_flip_on_load) {
+		int channels = req_comp ? req_comp : *comp;
+		stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
+	}
+
+	return (stbi__uint16*)result;
+}
+
+#if !defined(STBI_NO_HDR) || !defined(STBI_NO_LINEAR)
+static void stbi__float_postprocess(float* result, int* x, int* y, int* comp, int req_comp)
+{
+	if (stbi__vertically_flip_on_load && result != NULL) {
+		int channels = req_comp ? req_comp : *comp;
+		stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
+	}
+}
+#endif
+
+#ifndef STBI_NO_STDIO
+
+static FILE* stbi__fopen(char const* filename, char const* mode)
+{
+	FILE* f;
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+	if (0 != fopen_s(&f, filename, mode))
+		f = 0;
+#else
+	f = fopen(filename, mode);
+#endif
+	return f;
+}
+
+
+STBIDEF stbi_uc* stbi_load(char const* filename, int* x, int* y, int* comp, int req_comp)
+{
+	FILE* f = stbi__fopen(filename, "rb");
+	unsigned char* result;
+	if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
+	result = stbi_load_from_file(f, x, y, comp, req_comp);
+	fclose(f);
+	return result;
+}
+
+STBIDEF stbi_uc* stbi_load_from_file(FILE* f, int* x, int* y, int* comp, int req_comp)
+{
+	unsigned char* result;
+	stbi__context s;
+	stbi__start_file(&s, f);
+	result = stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);
+	if (result) {
+		// need to 'unget' all the characters in the IO buffer
+		fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR);
+	}
+	return result;
+}
+
+STBIDEF stbi__uint16* stbi_load_from_file_16(FILE* f, int* x, int* y, int* comp, int req_comp)
+{
+	stbi__uint16* result;
+	stbi__context s;
+	stbi__start_file(&s, f);
+	result = stbi__load_and_postprocess_16bit(&s, x, y, comp, req_comp);
+	if (result) {
+		// need to 'unget' all the characters in the IO buffer
+		fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR);
+	}
+	return result;
+}
+
+STBIDEF stbi_us* stbi_load_16(char const* filename, int* x, int* y, int* comp, int req_comp)
+{
+	FILE* f = stbi__fopen(filename, "rb");
+	stbi__uint16* result;
+	if (!f) return (stbi_us*)stbi__errpuc("can't fopen", "Unable to open file");
+	result = stbi_load_from_file_16(f, x, y, comp, req_comp);
+	fclose(f);
+	return result;
+}
+
+
+#endif //!STBI_NO_STDIO
+
+STBIDEF stbi_us* stbi_load_16_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* channels_in_file, int desired_channels)
+{
+	stbi__context s;
+	stbi__start_mem(&s, buffer, len);
+	return stbi__load_and_postprocess_16bit(&s, x, y, channels_in_file, desired_channels);
+}
+
+STBIDEF stbi_us* stbi_load_16_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* channels_in_file, int desired_channels)
+{
+	stbi__context s;
+	stbi__start_callbacks(&s, (stbi_io_callbacks*)clbk, user);
+	return stbi__load_and_postprocess_16bit(&s, x, y, channels_in_file, desired_channels);
+}
+
+STBIDEF stbi_uc* stbi_load_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* comp, int req_comp)
+{
+	stbi__context s;
+	stbi__start_mem(&s, buffer, len);
+	return stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);
+}
+
+STBIDEF stbi_uc* stbi_load_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* comp, int req_comp)
+{
+	stbi__context s;
+	stbi__start_callbacks(&s, (stbi_io_callbacks*)clbk, user);
+	return stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);
+}
+
+#ifndef STBI_NO_GIF
+STBIDEF stbi_uc* stbi_load_gif_from_memory(stbi_uc const* buffer, int len, int** delays, int* x, int* y, int* z, int* comp, int req_comp)
+{
+	unsigned char* result;
+	stbi__context s;
+	stbi__start_mem(&s, buffer, len);
+
+	result = (unsigned char*)stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);
+	if (stbi__vertically_flip_on_load) {
+		stbi__vertical_flip_slices(result, *x, *y, *z, *comp);
+	}
+
+	return result;
+}
+#endif
+
+#ifndef STBI_NO_LINEAR
+static float* stbi__loadf_main(stbi__context* s, int* x, int* y, int* comp, int req_comp)
+{
+	unsigned char* data;
+#ifndef STBI_NO_HDR
+	if (stbi__hdr_test(s)) {
+		stbi__result_info ri;
+		float* hdr_data = stbi__hdr_load(s, x, y, comp, req_comp, &ri);
+		if (hdr_data)
+			stbi__float_postprocess(hdr_data, x, y, comp, req_comp);
+		return hdr_data;
+	}
+#endif
+	data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
+	if (data)
+		return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
+	return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
+}
+
+STBIDEF float* stbi_loadf_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* comp, int req_comp)
+{
+	stbi__context s;
+	stbi__start_mem(&s, buffer, len);
+	return stbi__loadf_main(&s, x, y, comp, req_comp);
+}
+
+STBIDEF float* stbi_loadf_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* comp, int req_comp)
+{
+	stbi__context s;
+	stbi__start_callbacks(&s, (stbi_io_callbacks*)clbk, user);
+	return stbi__loadf_main(&s, x, y, comp, req_comp);
+}
+
+#ifndef STBI_NO_STDIO
+STBIDEF float* stbi_loadf(char const* filename, int* x, int* y, int* comp, int req_comp)
+{
+	float* result;
+	FILE* f = stbi__fopen(filename, "rb");
+	if (!f) return stbi__errpf("can't fopen", "Unable to open file");
+	result = stbi_loadf_from_file(f, x, y, comp, req_comp);
+	fclose(f);
+	return result;
+}
+
+STBIDEF float* stbi_loadf_from_file(FILE* f, int* x, int* y, int* comp, int req_comp)
+{
+	stbi__context s;
+	stbi__start_file(&s, f);
+	return stbi__loadf_main(&s, x, y, comp, req_comp);
+}
+#endif // !STBI_NO_STDIO
+
+#endif // !STBI_NO_LINEAR
+
+// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
+// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
+// reports false!
+
+STBIDEF int stbi_is_hdr_from_memory(stbi_uc const* buffer, int len)
+{
+#ifndef STBI_NO_HDR
+	stbi__context s;
+	stbi__start_mem(&s, buffer, len);
+	return stbi__hdr_test(&s);
+#else
+	STBI_NOTUSED(buffer);
+	STBI_NOTUSED(len);
+	return 0;
+#endif
+}
+
+#ifndef STBI_NO_STDIO
+STBIDEF int      stbi_is_hdr(char const* filename)
+{
+	FILE* f = stbi__fopen(filename, "rb");
+	int result = 0;
+	if (f) {
+		result = stbi_is_hdr_from_file(f);
+		fclose(f);
+	}
+	return result;
+}
+
+STBIDEF int stbi_is_hdr_from_file(FILE* f)
+{
+#ifndef STBI_NO_HDR
+	long pos = ftell(f);
+	int res;
+	stbi__context s;
+	stbi__start_file(&s, f);
+	res = stbi__hdr_test(&s);
+	fseek(f, pos, SEEK_SET);
+	return res;
+#else
+	STBI_NOTUSED(f);
+	return 0;
+#endif
+}
+#endif // !STBI_NO_STDIO
+
+STBIDEF int      stbi_is_hdr_from_callbacks(stbi_io_callbacks const* clbk, void* user)
+{
+#ifndef STBI_NO_HDR
+	stbi__context s;
+	stbi__start_callbacks(&s, (stbi_io_callbacks*)clbk, user);
+	return stbi__hdr_test(&s);
+#else
+	STBI_NOTUSED(clbk);
+	STBI_NOTUSED(user);
+	return 0;
+#endif
+}
+
+#ifndef STBI_NO_LINEAR
+static float stbi__l2h_gamma = 2.2f, stbi__l2h_scale = 1.0f;
+
+STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
+STBIDEF void   stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
+#endif
+
+static float stbi__h2l_gamma_i = 1.0f / 2.2f, stbi__h2l_scale_i = 1.0f;
+
+STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1 / gamma; }
+STBIDEF void   stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1 / scale; }
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Common code used by all image loaders
+//
+
+enum
+{
+	STBI__SCAN_load = 0,
+	STBI__SCAN_type,
+	STBI__SCAN_header
+};
+
+static void stbi__refill_buffer(stbi__context* s)
+{
+	int n = (s->io.read)(s->io_user_data, (char*)s->buffer_start, s->buflen);
+	if (n == 0) {
+		// at end of file, treat same as if from memory, but need to handle case
+		// where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
+		s->read_from_callbacks = 0;
+		s->img_buffer = s->buffer_start;
+		s->img_buffer_end = s->buffer_start + 1;
+		*s->img_buffer = 0;
+	}
+	else {
+		s->img_buffer = s->buffer_start;
+		s->img_buffer_end = s->buffer_start + n;
+	}
+}
+
+stbi_inline static stbi_uc stbi__get8(stbi__context* s)
+{
+	if (s->img_buffer < s->img_buffer_end)
+		return *s->img_buffer++;
+	if (s->read_from_callbacks) {
+		stbi__refill_buffer(s);
+		return *s->img_buffer++;
+	}
+	return 0;
+}
+
+stbi_inline static int stbi__at_eof(stbi__context* s)
+{
+	if (s->io.read) {
+		if (!(s->io.eof)(s->io_user_data)) return 0;
+		// if feof() is true, check if buffer = end
+		// special case: we've only got the special 0 character at the end
+		if (s->read_from_callbacks == 0) return 1;
+	}
+
+	return s->img_buffer >= s->img_buffer_end;
+}
+
+static void stbi__skip(stbi__context* s, int n)
+{
+	if (n < 0) {
+		s->img_buffer = s->img_buffer_end;
+		return;
+	}
+	if (s->io.read) {
+		int blen = (int)(s->img_buffer_end - s->img_buffer);
+		if (blen < n) {
+			s->img_buffer = s->img_buffer_end;
+			(s->io.skip)(s->io_user_data, n - blen);
+			return;
+		}
+	}
+	s->img_buffer += n;
+}
+
+static int stbi__getn(stbi__context* s, stbi_uc* buffer, int n)
+{
+	if (s->io.read) {
+		int blen = (int)(s->img_buffer_end - s->img_buffer);
+		if (blen < n) {
+			int res, count;
+
+			memcpy(buffer, s->img_buffer, blen);
+
+			count = (s->io.read)(s->io_user_data, (char*)buffer + blen, n - blen);
+			res = (count == (n - blen));
+			s->img_buffer = s->img_buffer_end;
+			return res;
+		}
+	}
+
+	if (s->img_buffer + n <= s->img_buffer_end) {
+		memcpy(buffer, s->img_buffer, n);
+		s->img_buffer += n;
+		return 1;
+	}
+	else
+		return 0;
+}
+
+static int stbi__get16be(stbi__context* s)
+{
+	int z = stbi__get8(s);
+	return (z << 8) + stbi__get8(s);
+}
+
+static stbi__uint32 stbi__get32be(stbi__context* s)
+{
+	stbi__uint32 z = stbi__get16be(s);
+	return (z << 16) + stbi__get16be(s);
+}
+
+#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
+// nothing
+#else
+static int stbi__get16le(stbi__context* s)
+{
+	int z = stbi__get8(s);
+	return z + (stbi__get8(s) << 8);
+}
+#endif
+
+#ifndef STBI_NO_BMP
+static stbi__uint32 stbi__get32le(stbi__context* s)
+{
+	stbi__uint32 z = stbi__get16le(s);
+	return z + (stbi__get16le(s) << 16);
+}
+#endif
+
+#define STBI__BYTECAST(x)  ((stbi_uc) ((x) & 255))  // truncate int to byte without warnings
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//  generic converter from built-in img_n to req_comp
+//    individual types do this automatically as much as possible (e.g. jpeg
+//    does all cases internally since it needs to colorspace convert anyway,
+//    and it never has alpha, so very few cases ). png can automatically
+//    interleave an alpha=255 channel, but falls back to this for other cases
+//
+//  assume data buffer is malloced, so malloc a new one and free that one
+//  only failure mode is malloc failing
+
+static stbi_uc stbi__compute_y(int r, int g, int b)
+{
+	return (stbi_uc)(((r * 77) + (g * 150) + (29 * b)) >> 8);
+}
+
+static unsigned char* stbi__convert_format(unsigned char* data, int img_n, int req_comp, unsigned int x, unsigned int y)
+{
+	int i, j;
+	unsigned char* good;
+
+	if (req_comp == img_n) return data;
+	STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
+
+	good = (unsigned char*)stbi__malloc_mad3(req_comp, x, y, 0);
+	if (good == NULL) {
+		STBI_FREE(data);
+		return stbi__errpuc("outofmem", "Out of memory");
+	}
+
+	for (j = 0; j < (int)y; ++j) {
+		unsigned char* src = data + j * x * img_n;
+		unsigned char* dest = good + j * x * req_comp;
+
+#define STBI__COMBO(a,b)  ((a)*8+(b))
+#define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
+		// convert source image with img_n components to one with req_comp components;
+		// avoid switch per pixel, so use switch per scanline and massive macros
+		switch (STBI__COMBO(img_n, req_comp)) {
+			STBI__CASE(1, 2) { dest[0] = src[0], dest[1] = 255; } break;
+			STBI__CASE(1, 3) { dest[0] = dest[1] = dest[2] = src[0]; } break;
+			STBI__CASE(1, 4) { dest[0] = dest[1] = dest[2] = src[0], dest[3] = 255; } break;
+			STBI__CASE(2, 1) { dest[0] = src[0]; } break;
+			STBI__CASE(2, 3) { dest[0] = dest[1] = dest[2] = src[0]; } break;
+			STBI__CASE(2, 4) { dest[0] = dest[1] = dest[2] = src[0], dest[3] = src[1]; } break;
+			STBI__CASE(3, 4) { dest[0] = src[0], dest[1] = src[1], dest[2] = src[2], dest[3] = 255; } break;
+			STBI__CASE(3, 1) { dest[0] = stbi__compute_y(src[0], src[1], src[2]); } break;
+			STBI__CASE(3, 2) { dest[0] = stbi__compute_y(src[0], src[1], src[2]), dest[1] = 255; } break;
+			STBI__CASE(4, 1) { dest[0] = stbi__compute_y(src[0], src[1], src[2]); } break;
+			STBI__CASE(4, 2) { dest[0] = stbi__compute_y(src[0], src[1], src[2]), dest[1] = src[3]; } break;
+			STBI__CASE(4, 3) { dest[0] = src[0], dest[1] = src[1], dest[2] = src[2]; } break;
+		default: STBI_ASSERT(0);
+		}
+#undef STBI__CASE
+	}
+
+	STBI_FREE(data);
+	return good;
+}
+
+static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
+{
+	return (stbi__uint16)(((r * 77) + (g * 150) + (29 * b)) >> 8);
+}
+
+static stbi__uint16* stbi__convert_format16(stbi__uint16* data, int img_n, int req_comp, unsigned int x, unsigned int y)
+{
+	int i, j;
+	stbi__uint16* good;
+
+	if (req_comp == img_n) return data;
+	STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
+
+	good = (stbi__uint16*)stbi__malloc(req_comp * x * y * 2);
+	if (good == NULL) {
+		STBI_FREE(data);
+		return (stbi__uint16*)stbi__errpuc("outofmem", "Out of memory");
+	}
+
+	for (j = 0; j < (int)y; ++j) {
+		stbi__uint16* src = data + j * x * img_n;
+		stbi__uint16* dest = good + j * x * req_comp;
+
+#define STBI__COMBO(a,b)  ((a)*8+(b))
+#define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
+		// convert source image with img_n components to one with req_comp components;
+		// avoid switch per pixel, so use switch per scanline and massive macros
+		switch (STBI__COMBO(img_n, req_comp)) {
+			STBI__CASE(1, 2) { dest[0] = src[0], dest[1] = 0xffff; } break;
+			STBI__CASE(1, 3) { dest[0] = dest[1] = dest[2] = src[0]; } break;
+			STBI__CASE(1, 4) { dest[0] = dest[1] = dest[2] = src[0], dest[3] = 0xffff; } break;
+			STBI__CASE(2, 1) { dest[0] = src[0]; } break;
+			STBI__CASE(2, 3) { dest[0] = dest[1] = dest[2] = src[0]; } break;
+			STBI__CASE(2, 4) { dest[0] = dest[1] = dest[2] = src[0], dest[3] = src[1]; } break;
+			STBI__CASE(3, 4) { dest[0] = src[0], dest[1] = src[1], dest[2] = src[2], dest[3] = 0xffff; } break;
+			STBI__CASE(3, 1) { dest[0] = stbi__compute_y_16(src[0], src[1], src[2]); } break;
+			STBI__CASE(3, 2) { dest[0] = stbi__compute_y_16(src[0], src[1], src[2]), dest[1] = 0xffff; } break;
+			STBI__CASE(4, 1) { dest[0] = stbi__compute_y_16(src[0], src[1], src[2]); } break;
+			STBI__CASE(4, 2) { dest[0] = stbi__compute_y_16(src[0], src[1], src[2]), dest[1] = src[3]; } break;
+			STBI__CASE(4, 3) { dest[0] = src[0], dest[1] = src[1], dest[2] = src[2]; } break;
+		default: STBI_ASSERT(0);
+		}
+#undef STBI__CASE
+	}
+
+	STBI_FREE(data);
+	return good;
+}
+
+#ifndef STBI_NO_LINEAR
+static float* stbi__ldr_to_hdr(stbi_uc* data, int x, int y, int comp)
+{
+	int i, k, n;
+	float* output;
+	if (!data) return NULL;
+	output = (float*)stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
+	if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
+	// compute number of non-alpha components
+	if (comp & 1) n = comp; else n = comp - 1;
+	for (i = 0; i < x * y; ++i) {
+		for (k = 0; k < n; ++k) {
+			output[i * comp + k] = (float)(pow(data[i * comp + k] / 255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
+		}
+		if (k < comp) output[i * comp + k] = data[i * comp + k] / 255.0f;
+	}
+	STBI_FREE(data);
+	return output;
+}
+#endif
+
+#ifndef STBI_NO_HDR
+#define stbi__float2int(x)   ((int) (x))
+static stbi_uc* stbi__hdr_to_ldr(float* data, int x, int y, int comp)
+{
+	int i, k, n;
+	stbi_uc* output;
+	if (!data) return NULL;
+	output = (stbi_uc*)stbi__malloc_mad3(x, y, comp, 0);
+	if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
+	// compute number of non-alpha components
+	if (comp & 1) n = comp; else n = comp - 1;
+	for (i = 0; i < x * y; ++i) {
+		for (k = 0; k < n; ++k) {
+			float z = (float)pow(data[i * comp + k] * stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
+			if (z < 0) z = 0;
+			if (z > 255) z = 255;
+			output[i * comp + k] = (stbi_uc)stbi__float2int(z);
+		}
+		if (k < comp) {
+			float z = data[i * comp + k] * 255 + 0.5f;
+			if (z < 0) z = 0;
+			if (z > 255) z = 255;
+			output[i * comp + k] = (stbi_uc)stbi__float2int(z);
+		}
+	}
+	STBI_FREE(data);
+	return output;
+}
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//  "baseline" JPEG/JFIF decoder
+//
+//    simple implementation
+//      - doesn't support delayed output of y-dimension
+//      - simple interface (only one output format: 8-bit interleaved RGB)
+//      - doesn't try to recover corrupt jpegs
+//      - doesn't allow partial loading, loading multiple at once
+//      - still fast on x86 (copying globals into locals doesn't help x86)
+//      - allocates lots of intermediate memory (full size of all components)
+//        - non-interleaved case requires this anyway
+//        - allows good upsampling (see next)
+//    high-quality
+//      - upsampled channels are bilinearly interpolated, even across blocks
+//      - quality integer IDCT derived from IJG's 'slow'
+//    performance
+//      - fast huffman; reasonable integer IDCT
+//      - some SIMD kernels for common paths on targets with SSE2/NEON
+//      - uses a lot of intermediate memory, could cache poorly
+
+#ifndef STBI_NO_JPEG
+
+// huffman decoding acceleration
+#define FAST_BITS   9  // larger handles more cases; smaller stomps less cache
+
+typedef struct
+{
+	stbi_uc  fast[1 << FAST_BITS];
+	// weirdly, repacking this into AoS is a 10% speed loss, instead of a win
+	stbi__uint16 code[256];
+	stbi_uc  values[256];
+	stbi_uc  size[257];
+	unsigned int maxcode[18];
+	int    delta[17];   // old 'firstsymbol' - old 'firstcode'
+} stbi__huffman;
+
+typedef struct
+{
+	stbi__context* s;
+	stbi__huffman huff_dc[4];
+	stbi__huffman huff_ac[4];
+	stbi__uint16 dequant[4][64];
+	stbi__int16 fast_ac[4][1 << FAST_BITS];
+
+	// sizes for components, interleaved MCUs
+	int img_h_max, img_v_max;
+	int img_mcu_x, img_mcu_y;
+	int img_mcu_w, img_mcu_h;
+
+	// definition of jpeg image component
+	struct
+	{
+		int id;
+		int h, v;
+		int tq;
+		int hd, ha;
+		int dc_pred;
+
+		int x, y, w2, h2;
+		stbi_uc* data;
+		void* raw_data, * raw_coeff;
+		stbi_uc* linebuf;
+		short* coeff;   // progressive only
+		int      coeff_w, coeff_h; // number of 8x8 coefficient blocks
+	} img_comp[4];
+
+	stbi__uint32   code_buffer; // jpeg entropy-coded buffer
+	int            code_bits;   // number of valid bits
+	unsigned char  marker;      // marker seen while filling entropy buffer
+	int            nomore;      // flag if we saw a marker so must stop
+
+	int            progressive;
+	int            spec_start;
+	int            spec_end;
+	int            succ_high;
+	int            succ_low;
+	int            eob_run;
+	int            jfif;
+	int            app14_color_transform; // Adobe APP14 tag
+	int            rgb;
+
+	int scan_n, order[4];
+	int restart_interval, todo;
+
+	// kernels
+	void (*idct_block_kernel)(stbi_uc* out, int out_stride, short data[64]);
+	void (*YCbCr_to_RGB_kernel)(stbi_uc* out, const stbi_uc* y, const stbi_uc* pcb, const stbi_uc* pcr, int count, int step);
+	stbi_uc* (*resample_row_hv_2_kernel)(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs);
+} stbi__jpeg;
+
+static int stbi__build_huffman(stbi__huffman* h, int* count)
+{
+	int i, j, k = 0;
+	unsigned int code;
+	// build size list for each symbol (from JPEG spec)
+	for (i = 0; i < 16; ++i)
+		for (j = 0; j < count[i]; ++j)
+			h->size[k++] = (stbi_uc)(i + 1);
+	h->size[k] = 0;
+
+	// compute actual symbols (from jpeg spec)
+	code = 0;
+	k = 0;
+	for (j = 1; j <= 16; ++j) {
+		// compute delta to add to code to compute symbol id
+		h->delta[j] = k - code;
+		if (h->size[k] == j) {
+			while (h->size[k] == j)
+				h->code[k++] = (stbi__uint16)(code++);
+			if (code - 1 >= (1u << j)) return stbi__err("bad code lengths", "Corrupt JPEG");
+		}
+		// compute largest code + 1 for this size, preshifted as needed later
+		h->maxcode[j] = code << (16 - j);
+		code <<= 1;
+	}
+	h->maxcode[j] = 0xffffffff;
+
+	// build non-spec acceleration table; 255 is flag for not-accelerated
+	memset(h->fast, 255, 1 << FAST_BITS);
+	for (i = 0; i < k; ++i) {
+		int s = h->size[i];
+		if (s <= FAST_BITS) {
+			int c = h->code[i] << (FAST_BITS - s);
+			int m = 1 << (FAST_BITS - s);
+			for (j = 0; j < m; ++j) {
+				h->fast[c + j] = (stbi_uc)i;
+			}
+		}
+	}
+	return 1;
+}
+
+// build a table that decodes both magnitude and value of small ACs in
+// one go.
+static void stbi__build_fast_ac(stbi__int16* fast_ac, stbi__huffman* h)
+{
+	int i;
+	for (i = 0; i < (1 << FAST_BITS); ++i) {
+		stbi_uc fast = h->fast[i];
+		fast_ac[i] = 0;
+		if (fast < 255) {
+			int rs = h->values[fast];
+			int run = (rs >> 4) & 15;
+			int magbits = rs & 15;
+			int len = h->size[fast];
+
+			if (magbits && len + magbits <= FAST_BITS) {
+				// magnitude code followed by receive_extend code
+				int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
+				int m = 1 << (magbits - 1);
+				if (k < m) k += (~0U << magbits) + 1;
+				// if the result is small enough, we can fit it in fast_ac table
+				if (k >= -128 && k <= 127)
+					fast_ac[i] = (stbi__int16)((k * 256) + (run * 16) + (len + magbits));
+			}
+		}
+	}
+}
+
+static void stbi__grow_buffer_unsafe(stbi__jpeg* j)
+{
+	do {
+		unsigned int b = j->nomore ? 0 : stbi__get8(j->s);
+		if (b == 0xff) {
+			int c = stbi__get8(j->s);
+			while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
+			if (c != 0) {
+				j->marker = (unsigned char)c;
+				j->nomore = 1;
+				return;
+			}
+		}
+		j->code_buffer |= b << (24 - j->code_bits);
+		j->code_bits += 8;
+	} while (j->code_bits <= 24);
+}
+
+// (1 << n) - 1
+static const stbi__uint32 stbi__bmask[17] = { 0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535 };
+
+// decode a jpeg huffman value from the bitstream
+stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg* j, stbi__huffman* h)
+{
+	unsigned int temp;
+	int c, k;
+
+	if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+
+	// look at the top FAST_BITS and determine what symbol ID it is,
+	// if the code is <= FAST_BITS
+	c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
+	k = h->fast[c];
+	if (k < 255) {
+		int s = h->size[k];
+		if (s > j->code_bits)
+			return -1;
+		j->code_buffer <<= s;
+		j->code_bits -= s;
+		return h->values[k];
+	}
+
+	// naive test is to shift the code_buffer down so k bits are
+	// valid, then test against maxcode. To speed this up, we've
+	// preshifted maxcode left so that it has (16-k) 0s at the
+	// end; in other words, regardless of the number of bits, it
+	// wants to be compared against something shifted to have 16;
+	// that way we don't need to shift inside the loop.
+	temp = j->code_buffer >> 16;
+	for (k = FAST_BITS + 1; ; ++k)
+		if (temp < h->maxcode[k])
+			break;
+	if (k == 17) {
+		// error! code not found
+		j->code_bits -= 16;
+		return -1;
+	}
+
+	if (k > j->code_bits)
+		return -1;
+
+	// convert the huffman code to the symbol id
+	c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
+	STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
+
+	// convert the id to a symbol
+	j->code_bits -= k;
+	j->code_buffer <<= k;
+	return h->values[c];
+}
+
+// bias[n] = (-1<<n) + 1
+static const int stbi__jbias[16] = { 0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767 };
+
+// combined JPEG 'receive' and JPEG 'extend', since baseline
+// always extends everything it receives.
+stbi_inline static int stbi__extend_receive(stbi__jpeg* j, int n)
+{
+	unsigned int k;
+	int sgn;
+	if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
+
+	sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
+	k = stbi_lrot(j->code_buffer, n);
+	STBI_ASSERT(n >= 0 && n < (int)(sizeof(stbi__bmask) / sizeof(*stbi__bmask)));
+	j->code_buffer = k & ~stbi__bmask[n];
+	k &= stbi__bmask[n];
+	j->code_bits -= n;
+	return k + (stbi__jbias[n] & ~sgn);
+}
+
+// get some unsigned bits
+stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg* j, int n)
+{
+	unsigned int k;
+	if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
+	k = stbi_lrot(j->code_buffer, n);
+	j->code_buffer = k & ~stbi__bmask[n];
+	k &= stbi__bmask[n];
+	j->code_bits -= n;
+	return k;
+}
+
+stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg* j)
+{
+	unsigned int k;
+	if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
+	k = j->code_buffer;
+	j->code_buffer <<= 1;
+	--j->code_bits;
+	return k & 0x80000000;
+}
+
+// given a value that's at position X in the zigzag stream,
+// where does it appear in the 8x8 matrix coded as row-major?
+static const stbi_uc stbi__jpeg_dezigzag[64 + 15] =
+{
+	0,  1,  8, 16,  9,  2,  3, 10,
+   17, 24, 32, 25, 18, 11,  4,  5,
+   12, 19, 26, 33, 40, 48, 41, 34,
+   27, 20, 13,  6,  7, 14, 21, 28,
+   35, 42, 49, 56, 57, 50, 43, 36,
+   29, 22, 15, 23, 30, 37, 44, 51,
+   58, 59, 52, 45, 38, 31, 39, 46,
+   53, 60, 61, 54, 47, 55, 62, 63,
+   // let corrupt input sample past end
+   63, 63, 63, 63, 63, 63, 63, 63,
+   63, 63, 63, 63, 63, 63, 63
+};
+
+// decode one 64-entry block--
+static int stbi__jpeg_decode_block(stbi__jpeg* j, short data[64], stbi__huffman* hdc, stbi__huffman* hac, stbi__int16* fac, int b, stbi__uint16* dequant)
+{
+	int diff, dc, k;
+	int t;
+
+	if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+	t = stbi__jpeg_huff_decode(j, hdc);
+	if (t < 0) return stbi__err("bad huffman code", "Corrupt JPEG");
+
+	// 0 all the ac values now so we can do it 32-bits at a time
+	memset(data, 0, 64 * sizeof(data[0]));
+
+	diff = t ? stbi__extend_receive(j, t) : 0;
+	dc = j->img_comp[b].dc_pred + diff;
+	j->img_comp[b].dc_pred = dc;
+	data[0] = (short)(dc * dequant[0]);
+
+	// decode AC components, see JPEG spec
+	k = 1;
+	do {
+		unsigned int zig;
+		int c, r, s;
+		if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+		c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
+		r = fac[c];
+		if (r) { // fast-AC path
+			k += (r >> 4) & 15; // run
+			s = r & 15; // combined length
+			j->code_buffer <<= s;
+			j->code_bits -= s;
+			// decode into unzigzag'd location
+			zig = stbi__jpeg_dezigzag[k++];
+			data[zig] = (short)((r >> 8) * dequant[zig]);
+		}
+		else {
+			int rs = stbi__jpeg_huff_decode(j, hac);
+			if (rs < 0) return stbi__err("bad huffman code", "Corrupt JPEG");
+			s = rs & 15;
+			r = rs >> 4;
+			if (s == 0) {
+				if (rs != 0xf0) break; // end block
+				k += 16;
+			}
+			else {
+				k += r;
+				// decode into unzigzag'd location
+				zig = stbi__jpeg_dezigzag[k++];
+				data[zig] = (short)(stbi__extend_receive(j, s) * dequant[zig]);
+			}
+		}
+	} while (k < 64);
+	return 1;
+}
+
+static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg* j, short data[64], stbi__huffman* hdc, int b)
+{
+	int diff, dc;
+	int t;
+	if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+
+	if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+
+	if (j->succ_high == 0) {
+		// first scan for DC coefficient, must be first
+		memset(data, 0, 64 * sizeof(data[0])); // 0 all the ac values now
+		t = stbi__jpeg_huff_decode(j, hdc);
+		diff = t ? stbi__extend_receive(j, t) : 0;
+
+		dc = j->img_comp[b].dc_pred + diff;
+		j->img_comp[b].dc_pred = dc;
+		data[0] = (short)(dc << j->succ_low);
+	}
+	else {
+		// refinement scan for DC coefficient
+		if (stbi__jpeg_get_bit(j))
+			data[0] += (short)(1 << j->succ_low);
+	}
+	return 1;
+}
+
+// @OPTIMIZE: store non-zigzagged during the decode passes,
+// and only de-zigzag when dequantizing
+static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg* j, short data[64], stbi__huffman* hac, stbi__int16* fac)
+{
+	int k;
+	if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+
+	if (j->succ_high == 0) {
+		int shift = j->succ_low;
+
+		if (j->eob_run) {
+			--j->eob_run;
+			return 1;
+		}
+
+		k = j->spec_start;
+		do {
+			unsigned int zig;
+			int c, r, s;
+			if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+			c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
+			r = fac[c];
+			if (r) { // fast-AC path
+				k += (r >> 4) & 15; // run
+				s = r & 15; // combined length
+				j->code_buffer <<= s;
+				j->code_bits -= s;
+				zig = stbi__jpeg_dezigzag[k++];
+				data[zig] = (short)((r >> 8) << shift);
+			}
+			else {
+				int rs = stbi__jpeg_huff_decode(j, hac);
+				if (rs < 0) return stbi__err("bad huffman code", "Corrupt JPEG");
+				s = rs & 15;
+				r = rs >> 4;
+				if (s == 0) {
+					if (r < 15) {
+						j->eob_run = (1 << r);
+						if (r)
+							j->eob_run += stbi__jpeg_get_bits(j, r);
+						--j->eob_run;
+						break;
+					}
+					k += 16;
+				}
+				else {
+					k += r;
+					zig = stbi__jpeg_dezigzag[k++];
+					data[zig] = (short)(stbi__extend_receive(j, s) << shift);
+				}
+			}
+		} while (k <= j->spec_end);
+	}
+	else {
+		// refinement scan for these AC coefficients
+
+		short bit = (short)(1 << j->succ_low);
+
+		if (j->eob_run) {
+			--j->eob_run;
+			for (k = j->spec_start; k <= j->spec_end; ++k) {
+				short* p = &data[stbi__jpeg_dezigzag[k]];
+				if (*p != 0)
+					if (stbi__jpeg_get_bit(j))
+						if ((*p & bit) == 0) {
+							if (*p > 0)
+								* p += bit;
+							else
+								*p -= bit;
+						}
+			}
+		}
+		else {
+			k = j->spec_start;
+			do {
+				int r, s;
+				int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
+				if (rs < 0) return stbi__err("bad huffman code", "Corrupt JPEG");
+				s = rs & 15;
+				r = rs >> 4;
+				if (s == 0) {
+					if (r < 15) {
+						j->eob_run = (1 << r) - 1;
+						if (r)
+							j->eob_run += stbi__jpeg_get_bits(j, r);
+						r = 64; // force end of block
+					}
+					else {
+						// r=15 s=0 should write 16 0s, so we just do
+						// a run of 15 0s and then write s (which is 0),
+						// so we don't have to do anything special here
+					}
+				}
+				else {
+					if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
+					// sign bit
+					if (stbi__jpeg_get_bit(j))
+						s = bit;
+					else
+						s = -bit;
+				}
+
+				// advance by r
+				while (k <= j->spec_end) {
+					short* p = &data[stbi__jpeg_dezigzag[k++]];
+					if (*p != 0) {
+						if (stbi__jpeg_get_bit(j))
+							if ((*p & bit) == 0) {
+								if (*p > 0)
+									* p += bit;
+								else
+									*p -= bit;
+							}
+					}
+					else {
+						if (r == 0) {
+							*p = (short)s;
+							break;
+						}
+						--r;
+					}
+				}
+			} while (k <= j->spec_end);
+		}
+	}
+	return 1;
+}
+
+// take a -128..127 value and stbi__clamp it and convert to 0..255
+stbi_inline static stbi_uc stbi__clamp(int x)
+{
+	// trick to use a single test to catch both cases
+	if ((unsigned int)x > 255) {
+		if (x < 0) return 0;
+		if (x > 255) return 255;
+	}
+	return (stbi_uc)x;
+}
+
+#define stbi__f2f(x)  ((int) (((x) * 4096 + 0.5)))
+#define stbi__fsh(x)  ((x) * 4096)
+
+// derived from jidctint -- DCT_ISLOW
+#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
+   int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
+   p2 = s2;                                    \
+   p3 = s6;                                    \
+   p1 = (p2+p3) * stbi__f2f(0.5411961f);       \
+   t2 = p1 + p3*stbi__f2f(-1.847759065f);      \
+   t3 = p1 + p2*stbi__f2f( 0.765366865f);      \
+   p2 = s0;                                    \
+   p3 = s4;                                    \
+   t0 = stbi__fsh(p2+p3);                      \
+   t1 = stbi__fsh(p2-p3);                      \
+   x0 = t0+t3;                                 \
+   x3 = t0-t3;                                 \
+   x1 = t1+t2;                                 \
+   x2 = t1-t2;                                 \
+   t0 = s7;                                    \
+   t1 = s5;                                    \
+   t2 = s3;                                    \
+   t3 = s1;                                    \
+   p3 = t0+t2;                                 \
+   p4 = t1+t3;                                 \
+   p1 = t0+t3;                                 \
+   p2 = t1+t2;                                 \
+   p5 = (p3+p4)*stbi__f2f( 1.175875602f);      \
+   t0 = t0*stbi__f2f( 0.298631336f);           \
+   t1 = t1*stbi__f2f( 2.053119869f);           \
+   t2 = t2*stbi__f2f( 3.072711026f);           \
+   t3 = t3*stbi__f2f( 1.501321110f);           \
+   p1 = p5 + p1*stbi__f2f(-0.899976223f);      \
+   p2 = p5 + p2*stbi__f2f(-2.562915447f);      \
+   p3 = p3*stbi__f2f(-1.961570560f);           \
+   p4 = p4*stbi__f2f(-0.390180644f);           \
+   t3 += p1+p4;                                \
+   t2 += p2+p3;                                \
+   t1 += p2+p4;                                \
+   t0 += p1+p3;
+
+static void stbi__idct_block(stbi_uc* out, int out_stride, short data[64])
+{
+	int i, val[64], * v = val;
+	stbi_uc* o;
+	short* d = data;
+
+	// columns
+	for (i = 0; i < 8; ++i, ++d, ++v) {
+		// if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
+		if (d[8] == 0 && d[16] == 0 && d[24] == 0 && d[32] == 0
+			&& d[40] == 0 && d[48] == 0 && d[56] == 0) {
+			//    no shortcut                 0     seconds
+			//    (1|2|3|4|5|6|7)==0          0     seconds
+			//    all separate               -0.047 seconds
+			//    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
+			int dcterm = d[0] * 4;
+			v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
+		}
+		else {
+			STBI__IDCT_1D(d[0], d[8], d[16], d[24], d[32], d[40], d[48], d[56])
+				// constants scaled things up by 1<<12; let's bring them back
+				// down, but keep 2 extra bits of precision
+				x0 += 512; x1 += 512; x2 += 512; x3 += 512;
+			v[0] = (x0 + t3) >> 10;
+			v[56] = (x0 - t3) >> 10;
+			v[8] = (x1 + t2) >> 10;
+			v[48] = (x1 - t2) >> 10;
+			v[16] = (x2 + t1) >> 10;
+			v[40] = (x2 - t1) >> 10;
+			v[24] = (x3 + t0) >> 10;
+			v[32] = (x3 - t0) >> 10;
+		}
+	}
+
+	for (i = 0, v = val, o = out; i < 8; ++i, v += 8, o += out_stride) {
+		// no fast case since the first 1D IDCT spread components out
+		STBI__IDCT_1D(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7])
+			// constants scaled things up by 1<<12, plus we had 1<<2 from first
+			// loop, plus horizontal and vertical each scale by sqrt(8) so together
+			// we've got an extra 1<<3, so 1<<17 total we need to remove.
+			// so we want to round that, which means adding 0.5 * 1<<17,
+			// aka 65536. Also, we'll end up with -128 to 127 that we want
+			// to encode as 0..255 by adding 128, so we'll add that before the shift
+			x0 += 65536 + (128 << 17);
+		x1 += 65536 + (128 << 17);
+		x2 += 65536 + (128 << 17);
+		x3 += 65536 + (128 << 17);
+		// tried computing the shifts into temps, or'ing the temps to see
+		// if any were out of range, but that was slower
+		o[0] = stbi__clamp((x0 + t3) >> 17);
+		o[7] = stbi__clamp((x0 - t3) >> 17);
+		o[1] = stbi__clamp((x1 + t2) >> 17);
+		o[6] = stbi__clamp((x1 - t2) >> 17);
+		o[2] = stbi__clamp((x2 + t1) >> 17);
+		o[5] = stbi__clamp((x2 - t1) >> 17);
+		o[3] = stbi__clamp((x3 + t0) >> 17);
+		o[4] = stbi__clamp((x3 - t0) >> 17);
+	}
+}
+
+#ifdef STBI_SSE2
+// sse2 integer IDCT. not the fastest possible implementation but it
+// produces bit-identical results to the generic C version so it's
+// fully "transparent".
+static void stbi__idct_simd(stbi_uc* out, int out_stride, short data[64])
+{
+	// This is constructed to match our regular (generic) integer IDCT exactly.
+	__m128i row0, row1, row2, row3, row4, row5, row6, row7;
+	__m128i tmp;
+
+	// dot product constant: even elems=x, odd elems=y
+#define dct_const(x,y)  _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
+
+// out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit)
+// out(1) = c1[even]*x + c1[odd]*y
+#define dct_rot(out0,out1, x,y,c0,c1) \
+	  __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
+	  __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
+	  __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
+	  __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
+	  __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
+	  __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
+
+   // out = in << 12  (in 16-bit, out 32-bit)
+#define dct_widen(out, in) \
+	  __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
+	  __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
+
+   // wide add
+#define dct_wadd(out, a, b) \
+	  __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
+	  __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
+
+   // wide sub
+#define dct_wsub(out, a, b) \
+	  __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
+	  __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
+
+   // butterfly a/b, add bias, then shift by "s" and pack
+#define dct_bfly32o(out0, out1, a,b,bias,s) \
+	  { \
+		 __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
+		 __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
+		 dct_wadd(sum, abiased, b); \
+		 dct_wsub(dif, abiased, b); \
+		 out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
+		 out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
+	  }
+
+   // 8-bit interleave step (for transposes)
+#define dct_interleave8(a, b) \
+	  tmp = a; \
+	  a = _mm_unpacklo_epi8(a, b); \
+	  b = _mm_unpackhi_epi8(tmp, b)
+
+   // 16-bit interleave step (for transposes)
+#define dct_interleave16(a, b) \
+	  tmp = a; \
+	  a = _mm_unpacklo_epi16(a, b); \
+	  b = _mm_unpackhi_epi16(tmp, b)
+
+#define dct_pass(bias,shift) \
+	  { \
+		 /* even part */ \
+		 dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
+		 __m128i sum04 = _mm_add_epi16(row0, row4); \
+		 __m128i dif04 = _mm_sub_epi16(row0, row4); \
+		 dct_widen(t0e, sum04); \
+		 dct_widen(t1e, dif04); \
+		 dct_wadd(x0, t0e, t3e); \
+		 dct_wsub(x3, t0e, t3e); \
+		 dct_wadd(x1, t1e, t2e); \
+		 dct_wsub(x2, t1e, t2e); \
+		 /* odd part */ \
+		 dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
+		 dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
+		 __m128i sum17 = _mm_add_epi16(row1, row7); \
+		 __m128i sum35 = _mm_add_epi16(row3, row5); \
+		 dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
+		 dct_wadd(x4, y0o, y4o); \
+		 dct_wadd(x5, y1o, y5o); \
+		 dct_wadd(x6, y2o, y5o); \
+		 dct_wadd(x7, y3o, y4o); \
+		 dct_bfly32o(row0,row7, x0,x7,bias,shift); \
+		 dct_bfly32o(row1,row6, x1,x6,bias,shift); \
+		 dct_bfly32o(row2,row5, x2,x5,bias,shift); \
+		 dct_bfly32o(row3,row4, x3,x4,bias,shift); \
+	  }
+
+	__m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
+	__m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f(0.765366865f), stbi__f2f(0.5411961f));
+	__m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
+	__m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
+	__m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f(0.298631336f), stbi__f2f(-1.961570560f));
+	__m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f(3.072711026f));
+	__m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f(2.053119869f), stbi__f2f(-0.390180644f));
+	__m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f(1.501321110f));
+
+	// rounding biases in column/row passes, see stbi__idct_block for explanation.
+	__m128i bias_0 = _mm_set1_epi32(512);
+	__m128i bias_1 = _mm_set1_epi32(65536 + (128 << 17));
+
+	// load
+	row0 = _mm_load_si128((const __m128i*) (data + 0 * 8));
+	row1 = _mm_load_si128((const __m128i*) (data + 1 * 8));
+	row2 = _mm_load_si128((const __m128i*) (data + 2 * 8));
+	row3 = _mm_load_si128((const __m128i*) (data + 3 * 8));
+	row4 = _mm_load_si128((const __m128i*) (data + 4 * 8));
+	row5 = _mm_load_si128((const __m128i*) (data + 5 * 8));
+	row6 = _mm_load_si128((const __m128i*) (data + 6 * 8));
+	row7 = _mm_load_si128((const __m128i*) (data + 7 * 8));
+
+	// column pass
+	dct_pass(bias_0, 10);
+
+	{
+		// 16bit 8x8 transpose pass 1
+		dct_interleave16(row0, row4);
+		dct_interleave16(row1, row5);
+		dct_interleave16(row2, row6);
+		dct_interleave16(row3, row7);
+
+		// transpose pass 2
+		dct_interleave16(row0, row2);
+		dct_interleave16(row1, row3);
+		dct_interleave16(row4, row6);
+		dct_interleave16(row5, row7);
+
+		// transpose pass 3
+		dct_interleave16(row0, row1);
+		dct_interleave16(row2, row3);
+		dct_interleave16(row4, row5);
+		dct_interleave16(row6, row7);
+	}
+
+	// row pass
+	dct_pass(bias_1, 17);
+
+	{
+		// pack
+		__m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
+		__m128i p1 = _mm_packus_epi16(row2, row3);
+		__m128i p2 = _mm_packus_epi16(row4, row5);
+		__m128i p3 = _mm_packus_epi16(row6, row7);
+
+		// 8bit 8x8 transpose pass 1
+		dct_interleave8(p0, p2); // a0e0a1e1...
+		dct_interleave8(p1, p3); // c0g0c1g1...
+
+		// transpose pass 2
+		dct_interleave8(p0, p1); // a0c0e0g0...
+		dct_interleave8(p2, p3); // b0d0f0h0...
+
+		// transpose pass 3
+		dct_interleave8(p0, p2); // a0b0c0d0...
+		dct_interleave8(p1, p3); // a4b4c4d4...
+
+		// store
+		_mm_storel_epi64((__m128i*) out, p0); out += out_stride;
+		_mm_storel_epi64((__m128i*) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
+		_mm_storel_epi64((__m128i*) out, p2); out += out_stride;
+		_mm_storel_epi64((__m128i*) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
+		_mm_storel_epi64((__m128i*) out, p1); out += out_stride;
+		_mm_storel_epi64((__m128i*) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
+		_mm_storel_epi64((__m128i*) out, p3); out += out_stride;
+		_mm_storel_epi64((__m128i*) out, _mm_shuffle_epi32(p3, 0x4e));
+	}
+
+#undef dct_const
+#undef dct_rot
+#undef dct_widen
+#undef dct_wadd
+#undef dct_wsub
+#undef dct_bfly32o
+#undef dct_interleave8
+#undef dct_interleave16
+#undef dct_pass
+}
+
+#endif // STBI_SSE2
+
+#ifdef STBI_NEON
+
+// NEON integer IDCT. should produce bit-identical
+// results to the generic C version.
+static void stbi__idct_simd(stbi_uc* out, int out_stride, short data[64])
+{
+	int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
+
+	int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
+	int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
+	int16x4_t rot0_2 = vdup_n_s16(stbi__f2f(0.765366865f));
+	int16x4_t rot1_0 = vdup_n_s16(stbi__f2f(1.175875602f));
+	int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
+	int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
+	int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
+	int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
+	int16x4_t rot3_0 = vdup_n_s16(stbi__f2f(0.298631336f));
+	int16x4_t rot3_1 = vdup_n_s16(stbi__f2f(2.053119869f));
+	int16x4_t rot3_2 = vdup_n_s16(stbi__f2f(3.072711026f));
+	int16x4_t rot3_3 = vdup_n_s16(stbi__f2f(1.501321110f));
+
+#define dct_long_mul(out, inq, coeff) \
+   int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
+   int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
+
+#define dct_long_mac(out, acc, inq, coeff) \
+   int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
+   int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
+
+#define dct_widen(out, inq) \
+   int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
+   int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
+
+	// wide add
+#define dct_wadd(out, a, b) \
+   int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
+   int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
+
+// wide sub
+#define dct_wsub(out, a, b) \
+   int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
+   int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
+
+// butterfly a/b, then shift using "shiftop" by "s" and pack
+#define dct_bfly32o(out0,out1, a,b,shiftop,s) \
+   { \
+	  dct_wadd(sum, a, b); \
+	  dct_wsub(dif, a, b); \
+	  out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
+	  out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
+   }
+
+#define dct_pass(shiftop, shift) \
+   { \
+	  /* even part */ \
+	  int16x8_t sum26 = vaddq_s16(row2, row6); \
+	  dct_long_mul(p1e, sum26, rot0_0); \
+	  dct_long_mac(t2e, p1e, row6, rot0_1); \
+	  dct_long_mac(t3e, p1e, row2, rot0_2); \
+	  int16x8_t sum04 = vaddq_s16(row0, row4); \
+	  int16x8_t dif04 = vsubq_s16(row0, row4); \
+	  dct_widen(t0e, sum04); \
+	  dct_widen(t1e, dif04); \
+	  dct_wadd(x0, t0e, t3e); \
+	  dct_wsub(x3, t0e, t3e); \
+	  dct_wadd(x1, t1e, t2e); \
+	  dct_wsub(x2, t1e, t2e); \
+	  /* odd part */ \
+	  int16x8_t sum15 = vaddq_s16(row1, row5); \
+	  int16x8_t sum17 = vaddq_s16(row1, row7); \
+	  int16x8_t sum35 = vaddq_s16(row3, row5); \
+	  int16x8_t sum37 = vaddq_s16(row3, row7); \
+	  int16x8_t sumodd = vaddq_s16(sum17, sum35); \
+	  dct_long_mul(p5o, sumodd, rot1_0); \
+	  dct_long_mac(p1o, p5o, sum17, rot1_1); \
+	  dct_long_mac(p2o, p5o, sum35, rot1_2); \
+	  dct_long_mul(p3o, sum37, rot2_0); \
+	  dct_long_mul(p4o, sum15, rot2_1); \
+	  dct_wadd(sump13o, p1o, p3o); \
+	  dct_wadd(sump24o, p2o, p4o); \
+	  dct_wadd(sump23o, p2o, p3o); \
+	  dct_wadd(sump14o, p1o, p4o); \
+	  dct_long_mac(x4, sump13o, row7, rot3_0); \
+	  dct_long_mac(x5, sump24o, row5, rot3_1); \
+	  dct_long_mac(x6, sump23o, row3, rot3_2); \
+	  dct_long_mac(x7, sump14o, row1, rot3_3); \
+	  dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
+	  dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
+	  dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
+	  dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
+   }
+
+   // load
+	row0 = vld1q_s16(data + 0 * 8);
+	row1 = vld1q_s16(data + 1 * 8);
+	row2 = vld1q_s16(data + 2 * 8);
+	row3 = vld1q_s16(data + 3 * 8);
+	row4 = vld1q_s16(data + 4 * 8);
+	row5 = vld1q_s16(data + 5 * 8);
+	row6 = vld1q_s16(data + 6 * 8);
+	row7 = vld1q_s16(data + 7 * 8);
+
+	// add DC bias
+	row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
+
+	// column pass
+	dct_pass(vrshrn_n_s32, 10);
+
+	// 16bit 8x8 transpose
+	{
+		// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
+		// whether compilers actually get this is another story, sadly.
+#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
+#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
+#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
+
+	  // pass 1
+		dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
+		dct_trn16(row2, row3);
+		dct_trn16(row4, row5);
+		dct_trn16(row6, row7);
+
+		// pass 2
+		dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
+		dct_trn32(row1, row3);
+		dct_trn32(row4, row6);
+		dct_trn32(row5, row7);
+
+		// pass 3
+		dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
+		dct_trn64(row1, row5);
+		dct_trn64(row2, row6);
+		dct_trn64(row3, row7);
+
+#undef dct_trn16
+#undef dct_trn32
+#undef dct_trn64
+	}
+
+	// row pass
+	// vrshrn_n_s32 only supports shifts up to 16, we need
+	// 17. so do a non-rounding shift of 16 first then follow
+	// up with a rounding shift by 1.
+	dct_pass(vshrn_n_s32, 16);
+
+	{
+		// pack and round
+		uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
+		uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
+		uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
+		uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
+		uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
+		uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
+		uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
+		uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
+
+		// again, these can translate into one instruction, but often don't.
+#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
+#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
+#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
+
+	  // sadly can't use interleaved stores here since we only write
+	  // 8 bytes to each scan line!
+
+	  // 8x8 8-bit transpose pass 1
+		dct_trn8_8(p0, p1);
+		dct_trn8_8(p2, p3);
+		dct_trn8_8(p4, p5);
+		dct_trn8_8(p6, p7);
+
+		// pass 2
+		dct_trn8_16(p0, p2);
+		dct_trn8_16(p1, p3);
+		dct_trn8_16(p4, p6);
+		dct_trn8_16(p5, p7);
+
+		// pass 3
+		dct_trn8_32(p0, p4);
+		dct_trn8_32(p1, p5);
+		dct_trn8_32(p2, p6);
+		dct_trn8_32(p3, p7);
+
+		// store
+		vst1_u8(out, p0); out += out_stride;
+		vst1_u8(out, p1); out += out_stride;
+		vst1_u8(out, p2); out += out_stride;
+		vst1_u8(out, p3); out += out_stride;
+		vst1_u8(out, p4); out += out_stride;
+		vst1_u8(out, p5); out += out_stride;
+		vst1_u8(out, p6); out += out_stride;
+		vst1_u8(out, p7);
+
+#undef dct_trn8_8
+#undef dct_trn8_16
+#undef dct_trn8_32
+	}
+
+#undef dct_long_mul
+#undef dct_long_mac
+#undef dct_widen
+#undef dct_wadd
+#undef dct_wsub
+#undef dct_bfly32o
+#undef dct_pass
+}
+
+#endif // STBI_NEON
+
+#define STBI__MARKER_none  0xff
+// if there's a pending marker from the entropy stream, return that
+// otherwise, fetch from the stream and get a marker. if there's no
+// marker, return 0xff, which is never a valid marker value
+static stbi_uc stbi__get_marker(stbi__jpeg * j)
+{
+	stbi_uc x;
+	if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
+	x = stbi__get8(j->s);
+	if (x != 0xff) return STBI__MARKER_none;
+	while (x == 0xff)
+		x = stbi__get8(j->s); // consume repeated 0xff fill bytes
+	return x;
+}
+
+// in each scan, we'll have scan_n components, and the order
+// of the components is specified by order[]
+#define STBI__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
+
+// after a restart interval, stbi__jpeg_reset the entropy decoder and
+// the dc prediction
+static void stbi__jpeg_reset(stbi__jpeg* j)
+{
+	j->code_bits = 0;
+	j->code_buffer = 0;
+	j->nomore = 0;
+	j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
+	j->marker = STBI__MARKER_none;
+	j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
+	j->eob_run = 0;
+	// no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
+	// since we don't even allow 1<<30 pixels
+}
+
+static int stbi__parse_entropy_coded_data(stbi__jpeg* z)
+{
+	stbi__jpeg_reset(z);
+	if (!z->progressive) {
+		if (z->scan_n == 1) {
+			int i, j;
+			STBI_SIMD_ALIGN(short, data[64]);
+			int n = z->order[0];
+			// non-interleaved data, we just need to process one block at a time,
+			// in trivial scanline order
+			// number of blocks to do just depends on how many actual "pixels" this
+			// component has, independent of interleaved MCU blocking and such
+			int w = (z->img_comp[n].x + 7) >> 3;
+			int h = (z->img_comp[n].y + 7) >> 3;
+			for (j = 0; j < h; ++j) {
+				for (i = 0; i < w; ++i) {
+					int ha = z->img_comp[n].ha;
+					if (!stbi__jpeg_decode_block(z, data, z->huff_dc + z->img_comp[n].hd, z->huff_ac + ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
+					z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2 * j * 8 + i * 8, z->img_comp[n].w2, data);
+					// every data block is an MCU, so countdown the restart interval
+					if (--z->todo <= 0) {
+						if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+						// if it's NOT a restart, then just bail, so we get corrupt data
+						// rather than no data
+						if (!STBI__RESTART(z->marker)) return 1;
+						stbi__jpeg_reset(z);
+					}
+				}
+			}
+			return 1;
+		}
+		else { // interleaved
+			int i, j, k, x, y;
+			STBI_SIMD_ALIGN(short, data[64]);
+			for (j = 0; j < z->img_mcu_y; ++j) {
+				for (i = 0; i < z->img_mcu_x; ++i) {
+					// scan an interleaved mcu... process scan_n components in order
+					for (k = 0; k < z->scan_n; ++k) {
+						int n = z->order[k];
+						// scan out an mcu's worth of this component; that's just determined
+						// by the basic H and V specified for the component
+						for (y = 0; y < z->img_comp[n].v; ++y) {
+							for (x = 0; x < z->img_comp[n].h; ++x) {
+								int x2 = (i * z->img_comp[n].h + x) * 8;
+								int y2 = (j * z->img_comp[n].v + y) * 8;
+								int ha = z->img_comp[n].ha;
+								if (!stbi__jpeg_decode_block(z, data, z->huff_dc + z->img_comp[n].hd, z->huff_ac + ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
+								z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2 * y2 + x2, z->img_comp[n].w2, data);
+							}
+						}
+					}
+					// after all interleaved components, that's an interleaved MCU,
+					// so now count down the restart interval
+					if (--z->todo <= 0) {
+						if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+						if (!STBI__RESTART(z->marker)) return 1;
+						stbi__jpeg_reset(z);
+					}
+				}
+			}
+			return 1;
+		}
+	}
+	else {
+		if (z->scan_n == 1) {
+			int i, j;
+			int n = z->order[0];
+			// non-interleaved data, we just need to process one block at a time,
+			// in trivial scanline order
+			// number of blocks to do just depends on how many actual "pixels" this
+			// component has, independent of interleaved MCU blocking and such
+			int w = (z->img_comp[n].x + 7) >> 3;
+			int h = (z->img_comp[n].y + 7) >> 3;
+			for (j = 0; j < h; ++j) {
+				for (i = 0; i < w; ++i) {
+					short* data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
+					if (z->spec_start == 0) {
+						if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
+							return 0;
+					}
+					else {
+						int ha = z->img_comp[n].ha;
+						if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
+							return 0;
+					}
+					// every data block is an MCU, so countdown the restart interval
+					if (--z->todo <= 0) {
+						if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+						if (!STBI__RESTART(z->marker)) return 1;
+						stbi__jpeg_reset(z);
+					}
+				}
+			}
+			return 1;
+		}
+		else { // interleaved
+			int i, j, k, x, y;
+			for (j = 0; j < z->img_mcu_y; ++j) {
+				for (i = 0; i < z->img_mcu_x; ++i) {
+					// scan an interleaved mcu... process scan_n components in order
+					for (k = 0; k < z->scan_n; ++k) {
+						int n = z->order[k];
+						// scan out an mcu's worth of this component; that's just determined
+						// by the basic H and V specified for the component
+						for (y = 0; y < z->img_comp[n].v; ++y) {
+							for (x = 0; x < z->img_comp[n].h; ++x) {
+								int x2 = (i * z->img_comp[n].h + x);
+								int y2 = (j * z->img_comp[n].v + y);
+								short* data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
+								if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
+									return 0;
+							}
+						}
+					}
+					// after all interleaved components, that's an interleaved MCU,
+					// so now count down the restart interval
+					if (--z->todo <= 0) {
+						if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+						if (!STBI__RESTART(z->marker)) return 1;
+						stbi__jpeg_reset(z);
+					}
+				}
+			}
+			return 1;
+		}
+	}
+}
+
+static void stbi__jpeg_dequantize(short* data, stbi__uint16* dequant)
+{
+	int i;
+	for (i = 0; i < 64; ++i)
+		data[i] *= dequant[i];
+}
+
+static void stbi__jpeg_finish(stbi__jpeg* z)
+{
+	if (z->progressive) {
+		// dequantize and idct the data
+		int i, j, n;
+		for (n = 0; n < z->s->img_n; ++n) {
+			int w = (z->img_comp[n].x + 7) >> 3;
+			int h = (z->img_comp[n].y + 7) >> 3;
+			for (j = 0; j < h; ++j) {
+				for (i = 0; i < w; ++i) {
+					short* data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
+					stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
+					z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2 * j * 8 + i * 8, z->img_comp[n].w2, data);
+				}
+			}
+		}
+	}
+}
+
+static int stbi__process_marker(stbi__jpeg* z, int m)
+{
+	int L;
+	switch (m) {
+	case STBI__MARKER_none: // no marker found
+		return stbi__err("expected marker", "Corrupt JPEG");
+
+	case 0xDD: // DRI - specify restart interval
+		if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len", "Corrupt JPEG");
+		z->restart_interval = stbi__get16be(z->s);
+		return 1;
+
+	case 0xDB: // DQT - define quantization table
+		L = stbi__get16be(z->s) - 2;
+		while (L > 0) {
+			int q = stbi__get8(z->s);
+			int p = q >> 4, sixteen = (p != 0);
+			int t = q & 15, i;
+			if (p != 0 && p != 1) return stbi__err("bad DQT type", "Corrupt JPEG");
+			if (t > 3) return stbi__err("bad DQT table", "Corrupt JPEG");
+
+			for (i = 0; i < 64; ++i)
+				z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
+			L -= (sixteen ? 129 : 65);
+		}
+		return L == 0;
+
+	case 0xC4: // DHT - define huffman table
+		L = stbi__get16be(z->s) - 2;
+		while (L > 0) {
+			stbi_uc* v;
+			int sizes[16], i, n = 0;
+			int q = stbi__get8(z->s);
+			int tc = q >> 4;
+			int th = q & 15;
+			if (tc > 1 || th > 3) return stbi__err("bad DHT header", "Corrupt JPEG");
+			for (i = 0; i < 16; ++i) {
+				sizes[i] = stbi__get8(z->s);
+				n += sizes[i];
+			}
+			L -= 17;
+			if (tc == 0) {
+				if (!stbi__build_huffman(z->huff_dc + th, sizes)) return 0;
+				v = z->huff_dc[th].values;
+			}
+			else {
+				if (!stbi__build_huffman(z->huff_ac + th, sizes)) return 0;
+				v = z->huff_ac[th].values;
+			}
+			for (i = 0; i < n; ++i)
+				v[i] = stbi__get8(z->s);
+			if (tc != 0)
+				stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
+			L -= n;
+		}
+		return L == 0;
+	}
+
+	// check for comment block or APP blocks
+	if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
+		L = stbi__get16be(z->s);
+		if (L < 2) {
+			if (m == 0xFE)
+				return stbi__err("bad COM len", "Corrupt JPEG");
+			else
+				return stbi__err("bad APP len", "Corrupt JPEG");
+		}
+		L -= 2;
+
+		if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
+			static const unsigned char tag[5] = { 'J','F','I','F','\0' };
+			int ok = 1;
+			int i;
+			for (i = 0; i < 5; ++i)
+				if (stbi__get8(z->s) != tag[i])
+					ok = 0;
+			L -= 5;
+			if (ok)
+				z->jfif = 1;
+		}
+		else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
+			static const unsigned char tag[6] = { 'A','d','o','b','e','\0' };
+			int ok = 1;
+			int i;
+			for (i = 0; i < 6; ++i)
+				if (stbi__get8(z->s) != tag[i])
+					ok = 0;
+			L -= 6;
+			if (ok) {
+				stbi__get8(z->s); // version
+				stbi__get16be(z->s); // flags0
+				stbi__get16be(z->s); // flags1
+				z->app14_color_transform = stbi__get8(z->s); // color transform
+				L -= 6;
+			}
+		}
+
+		stbi__skip(z->s, L);
+		return 1;
+	}
+
+	return stbi__err("unknown marker", "Corrupt JPEG");
+}
+
+// after we see SOS
+static int stbi__process_scan_header(stbi__jpeg* z)
+{
+	int i;
+	int Ls = stbi__get16be(z->s);
+	z->scan_n = stbi__get8(z->s);
+	if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int)z->s->img_n) return stbi__err("bad SOS component count", "Corrupt JPEG");
+	if (Ls != 6 + 2 * z->scan_n) return stbi__err("bad SOS len", "Corrupt JPEG");
+	for (i = 0; i < z->scan_n; ++i) {
+		int id = stbi__get8(z->s), which;
+		int q = stbi__get8(z->s);
+		for (which = 0; which < z->s->img_n; ++which)
+			if (z->img_comp[which].id == id)
+				break;
+		if (which == z->s->img_n) return 0; // no match
+		z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff", "Corrupt JPEG");
+		z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff", "Corrupt JPEG");
+		z->order[i] = which;
+	}
+
+	{
+		int aa;
+		z->spec_start = stbi__get8(z->s);
+		z->spec_end = stbi__get8(z->s); // should be 63, but might be 0
+		aa = stbi__get8(z->s);
+		z->succ_high = (aa >> 4);
+		z->succ_low = (aa & 15);
+		if (z->progressive) {
+			if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
+				return stbi__err("bad SOS", "Corrupt JPEG");
+		}
+		else {
+			if (z->spec_start != 0) return stbi__err("bad SOS", "Corrupt JPEG");
+			if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS", "Corrupt JPEG");
+			z->spec_end = 63;
+		}
+	}
+
+	return 1;
+}
+
+static int stbi__free_jpeg_components(stbi__jpeg* z, int ncomp, int why)
+{
+	int i;
+	for (i = 0; i < ncomp; ++i) {
+		if (z->img_comp[i].raw_data) {
+			STBI_FREE(z->img_comp[i].raw_data);
+			z->img_comp[i].raw_data = NULL;
+			z->img_comp[i].data = NULL;
+		}
+		if (z->img_comp[i].raw_coeff) {
+			STBI_FREE(z->img_comp[i].raw_coeff);
+			z->img_comp[i].raw_coeff = 0;
+			z->img_comp[i].coeff = 0;
+		}
+		if (z->img_comp[i].linebuf) {
+			STBI_FREE(z->img_comp[i].linebuf);
+			z->img_comp[i].linebuf = NULL;
+		}
+	}
+	return why;
+}
+
+static int stbi__process_frame_header(stbi__jpeg* z, int scan)
+{
+	stbi__context* s = z->s;
+	int Lf, p, i, q, h_max = 1, v_max = 1, c;
+	Lf = stbi__get16be(s);         if (Lf < 11) return stbi__err("bad SOF len", "Corrupt JPEG"); // JPEG
+	p = stbi__get8(s);            if (p != 8) return stbi__err("only 8-bit", "JPEG format not supported: 8-bit only"); // JPEG baseline
+	s->img_y = stbi__get16be(s);   if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
+	s->img_x = stbi__get16be(s);   if (s->img_x == 0) return stbi__err("0 width", "Corrupt JPEG"); // JPEG requires
+	c = stbi__get8(s);
+	if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count", "Corrupt JPEG");
+	s->img_n = c;
+	for (i = 0; i < c; ++i) {
+		z->img_comp[i].data = NULL;
+		z->img_comp[i].linebuf = NULL;
+	}
+
+	if (Lf != 8 + 3 * s->img_n) return stbi__err("bad SOF len", "Corrupt JPEG");
+
+	z->rgb = 0;
+	for (i = 0; i < s->img_n; ++i) {
+		static const unsigned char rgb[3] = { 'R', 'G', 'B' };
+		z->img_comp[i].id = stbi__get8(s);
+		if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
+			++z->rgb;
+		q = stbi__get8(s);
+		z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H", "Corrupt JPEG");
+		z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V", "Corrupt JPEG");
+		z->img_comp[i].tq = stbi__get8(s);  if (z->img_comp[i].tq > 3) return stbi__err("bad TQ", "Corrupt JPEG");
+	}
+
+	if (scan != STBI__SCAN_load) return 1;
+
+	if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
+
+	for (i = 0; i < s->img_n; ++i) {
+		if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
+		if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
+	}
+
+	// compute interleaved mcu info
+	z->img_h_max = h_max;
+	z->img_v_max = v_max;
+	z->img_mcu_w = h_max * 8;
+	z->img_mcu_h = v_max * 8;
+	// these sizes can't be more than 17 bits
+	z->img_mcu_x = (s->img_x + z->img_mcu_w - 1) / z->img_mcu_w;
+	z->img_mcu_y = (s->img_y + z->img_mcu_h - 1) / z->img_mcu_h;
+
+	for (i = 0; i < s->img_n; ++i) {
+		// number of effective pixels (e.g. for non-interleaved MCU)
+		z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max - 1) / h_max;
+		z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max - 1) / v_max;
+		// to simplify generation, we'll allocate enough memory to decode
+		// the bogus oversized data from using interleaved MCUs and their
+		// big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
+		// discard the extra data until colorspace conversion
+		//
+		// img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
+		// so these muls can't overflow with 32-bit ints (which we require)
+		z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
+		z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
+		z->img_comp[i].coeff = 0;
+		z->img_comp[i].raw_coeff = 0;
+		z->img_comp[i].linebuf = NULL;
+		z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
+		if (z->img_comp[i].raw_data == NULL)
+			return stbi__free_jpeg_components(z, i + 1, stbi__err("outofmem", "Out of memory"));
+		// align blocks for idct using mmx/sse
+		z->img_comp[i].data = (stbi_uc*)(((size_t)z->img_comp[i].raw_data + 15) & ~15);
+		if (z->progressive) {
+			// w2, h2 are multiples of 8 (see above)
+			z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
+			z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
+			z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
+			if (z->img_comp[i].raw_coeff == NULL)
+				return stbi__free_jpeg_components(z, i + 1, stbi__err("outofmem", "Out of memory"));
+			z->img_comp[i].coeff = (short*)(((size_t)z->img_comp[i].raw_coeff + 15) & ~15);
+		}
+	}
+
+	return 1;
+}
+
+// use comparisons since in some cases we handle more than one case (e.g. SOF)
+#define stbi__DNL(x)         ((x) == 0xdc)
+#define stbi__SOI(x)         ((x) == 0xd8)
+#define stbi__EOI(x)         ((x) == 0xd9)
+#define stbi__SOF(x)         ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
+#define stbi__SOS(x)         ((x) == 0xda)
+
+#define stbi__SOF_progressive(x)   ((x) == 0xc2)
+
+static int stbi__decode_jpeg_header(stbi__jpeg* z, int scan)
+{
+	int m;
+	z->jfif = 0;
+	z->app14_color_transform = -1; // valid values are 0,1,2
+	z->marker = STBI__MARKER_none; // initialize cached marker to empty
+	m = stbi__get_marker(z);
+	if (!stbi__SOI(m)) return stbi__err("no SOI", "Corrupt JPEG");
+	if (scan == STBI__SCAN_type) return 1;
+	m = stbi__get_marker(z);
+	while (!stbi__SOF(m)) {
+		if (!stbi__process_marker(z, m)) return 0;
+		m = stbi__get_marker(z);
+		while (m == STBI__MARKER_none) {
+			// some files have extra padding after their blocks, so ok, we'll scan
+			if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
+			m = stbi__get_marker(z);
+		}
+	}
+	z->progressive = stbi__SOF_progressive(m);
+	if (!stbi__process_frame_header(z, scan)) return 0;
+	return 1;
+}
+
+// decode image to YCbCr format
+static int stbi__decode_jpeg_image(stbi__jpeg* j)
+{
+	int m;
+	for (m = 0; m < 4; m++) {
+		j->img_comp[m].raw_data = NULL;
+		j->img_comp[m].raw_coeff = NULL;
+	}
+	j->restart_interval = 0;
+	if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
+	m = stbi__get_marker(j);
+	while (!stbi__EOI(m)) {
+		if (stbi__SOS(m)) {
+			if (!stbi__process_scan_header(j)) return 0;
+			if (!stbi__parse_entropy_coded_data(j)) return 0;
+			if (j->marker == STBI__MARKER_none) {
+				// handle 0s at the end of image data from IP Kamera 9060
+				while (!stbi__at_eof(j->s)) {
+					int x = stbi__get8(j->s);
+					if (x == 255) {
+						j->marker = stbi__get8(j->s);
+						break;
+					}
+				}
+				// if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
+			}
+		}
+		else if (stbi__DNL(m)) {
+			int Ld = stbi__get16be(j->s);
+			stbi__uint32 NL = stbi__get16be(j->s);
+			if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG");
+			if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG");
+		}
+		else {
+			if (!stbi__process_marker(j, m)) return 0;
+		}
+		m = stbi__get_marker(j);
+	}
+	if (j->progressive)
+		stbi__jpeg_finish(j);
+	return 1;
+}
+
+// static jfif-centered resampling (across block boundaries)
+
+typedef stbi_uc* (*resample_row_func)(stbi_uc* out, stbi_uc* in0, stbi_uc* in1,
+	int w, int hs);
+
+#define stbi__div4(x) ((stbi_uc) ((x) >> 2))
+
+static stbi_uc* resample_row_1(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs)
+{
+	STBI_NOTUSED(out);
+	STBI_NOTUSED(in_far);
+	STBI_NOTUSED(w);
+	STBI_NOTUSED(hs);
+	return in_near;
+}
+
+static stbi_uc* stbi__resample_row_v_2(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs)
+{
+	// need to generate two samples vertically for every one in input
+	int i;
+	STBI_NOTUSED(hs);
+	for (i = 0; i < w; ++i)
+		out[i] = stbi__div4(3 * in_near[i] + in_far[i] + 2);
+	return out;
+}
+
+static stbi_uc* stbi__resample_row_h_2(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs)
+{
+	// need to generate two samples horizontally for every one in input
+	int i;
+	stbi_uc* input = in_near;
+
+	if (w == 1) {
+		// if only one sample, can't do any interpolation
+		out[0] = out[1] = input[0];
+		return out;
+	}
+
+	out[0] = input[0];
+	out[1] = stbi__div4(input[0] * 3 + input[1] + 2);
+	for (i = 1; i < w - 1; ++i) {
+		int n = 3 * input[i] + 2;
+		out[i * 2 + 0] = stbi__div4(n + input[i - 1]);
+		out[i * 2 + 1] = stbi__div4(n + input[i + 1]);
+	}
+	out[i * 2 + 0] = stbi__div4(input[w - 2] * 3 + input[w - 1] + 2);
+	out[i * 2 + 1] = input[w - 1];
+
+	STBI_NOTUSED(in_far);
+	STBI_NOTUSED(hs);
+
+	return out;
+}
+
+#define stbi__div16(x) ((stbi_uc) ((x) >> 4))
+
+static stbi_uc* stbi__resample_row_hv_2(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs)
+{
+	// need to generate 2x2 samples for every one in input
+	int i, t0, t1;
+	if (w == 1) {
+		out[0] = out[1] = stbi__div4(3 * in_near[0] + in_far[0] + 2);
+		return out;
+	}
+
+	t1 = 3 * in_near[0] + in_far[0];
+	out[0] = stbi__div4(t1 + 2);
+	for (i = 1; i < w; ++i) {
+		t0 = t1;
+		t1 = 3 * in_near[i] + in_far[i];
+		out[i * 2 - 1] = stbi__div16(3 * t0 + t1 + 8);
+		out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
+	}
+	out[w * 2 - 1] = stbi__div4(t1 + 2);
+
+	STBI_NOTUSED(hs);
+
+	return out;
+}
+
+#if defined(STBI_SSE2) || defined(STBI_NEON)
+static stbi_uc* stbi__resample_row_hv_2_simd(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs)
+{
+	// need to generate 2x2 samples for every one in input
+	int i = 0, t0, t1;
+
+	if (w == 1) {
+		out[0] = out[1] = stbi__div4(3 * in_near[0] + in_far[0] + 2);
+		return out;
+	}
+
+	t1 = 3 * in_near[0] + in_far[0];
+	// process groups of 8 pixels for as long as we can.
+	// note we can't handle the last pixel in a row in this loop
+	// because we need to handle the filter boundary conditions.
+	for (; i < ((w - 1) & ~7); i += 8) {
+#if defined(STBI_SSE2)
+		// load and perform the vertical filtering pass
+		// this uses 3*x + y = 4*x + (y - x)
+		__m128i zero = _mm_setzero_si128();
+		__m128i farb = _mm_loadl_epi64((__m128i*) (in_far + i));
+		__m128i nearb = _mm_loadl_epi64((__m128i*) (in_near + i));
+		__m128i farw = _mm_unpacklo_epi8(farb, zero);
+		__m128i nearw = _mm_unpacklo_epi8(nearb, zero);
+		__m128i diff = _mm_sub_epi16(farw, nearw);
+		__m128i nears = _mm_slli_epi16(nearw, 2);
+		__m128i curr = _mm_add_epi16(nears, diff); // current row
+
+		// horizontal filter works the same based on shifted vers of current
+		// row. "prev" is current row shifted right by 1 pixel; we need to
+		// insert the previous pixel value (from t1).
+		// "next" is current row shifted left by 1 pixel, with first pixel
+		// of next block of 8 pixels added in.
+		__m128i prv0 = _mm_slli_si128(curr, 2);
+		__m128i nxt0 = _mm_srli_si128(curr, 2);
+		__m128i prev = _mm_insert_epi16(prv0, t1, 0);
+		__m128i next = _mm_insert_epi16(nxt0, 3 * in_near[i + 8] + in_far[i + 8], 7);
+
+		// horizontal filter, polyphase implementation since it's convenient:
+		// even pixels = 3*cur + prev = cur*4 + (prev - cur)
+		// odd  pixels = 3*cur + next = cur*4 + (next - cur)
+		// note the shared term.
+		__m128i bias = _mm_set1_epi16(8);
+		__m128i curs = _mm_slli_epi16(curr, 2);
+		__m128i prvd = _mm_sub_epi16(prev, curr);
+		__m128i nxtd = _mm_sub_epi16(next, curr);
+		__m128i curb = _mm_add_epi16(curs, bias);
+		__m128i even = _mm_add_epi16(prvd, curb);
+		__m128i odd = _mm_add_epi16(nxtd, curb);
+
+		// interleave even and odd pixels, then undo scaling.
+		__m128i int0 = _mm_unpacklo_epi16(even, odd);
+		__m128i int1 = _mm_unpackhi_epi16(even, odd);
+		__m128i de0 = _mm_srli_epi16(int0, 4);
+		__m128i de1 = _mm_srli_epi16(int1, 4);
+
+		// pack and write output
+		__m128i outv = _mm_packus_epi16(de0, de1);
+		_mm_storeu_si128((__m128i*) (out + i * 2), outv);
+#elif defined(STBI_NEON)
+		// load and perform the vertical filtering pass
+		// this uses 3*x + y = 4*x + (y - x)
+		uint8x8_t farb = vld1_u8(in_far + i);
+		uint8x8_t nearb = vld1_u8(in_near + i);
+		int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
+		int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
+		int16x8_t curr = vaddq_s16(nears, diff); // current row
+
+		// horizontal filter works the same based on shifted vers of current
+		// row. "prev" is current row shifted right by 1 pixel; we need to
+		// insert the previous pixel value (from t1).
+		// "next" is current row shifted left by 1 pixel, with first pixel
+		// of next block of 8 pixels added in.
+		int16x8_t prv0 = vextq_s16(curr, curr, 7);
+		int16x8_t nxt0 = vextq_s16(curr, curr, 1);
+		int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
+		int16x8_t next = vsetq_lane_s16(3 * in_near[i + 8] + in_far[i + 8], nxt0, 7);
+
+		// horizontal filter, polyphase implementation since it's convenient:
+		// even pixels = 3*cur + prev = cur*4 + (prev - cur)
+		// odd  pixels = 3*cur + next = cur*4 + (next - cur)
+		// note the shared term.
+		int16x8_t curs = vshlq_n_s16(curr, 2);
+		int16x8_t prvd = vsubq_s16(prev, curr);
+		int16x8_t nxtd = vsubq_s16(next, curr);
+		int16x8_t even = vaddq_s16(curs, prvd);
+		int16x8_t odd = vaddq_s16(curs, nxtd);
+
+		// undo scaling and round, then store with even/odd phases interleaved
+		uint8x8x2_t o;
+		o.val[0] = vqrshrun_n_s16(even, 4);
+		o.val[1] = vqrshrun_n_s16(odd, 4);
+		vst2_u8(out + i * 2, o);
+#endif
+
+		// "previous" value for next iter
+		t1 = 3 * in_near[i + 7] + in_far[i + 7];
+	}
+
+	t0 = t1;
+	t1 = 3 * in_near[i] + in_far[i];
+	out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
+
+	for (++i; i < w; ++i) {
+		t0 = t1;
+		t1 = 3 * in_near[i] + in_far[i];
+		out[i * 2 - 1] = stbi__div16(3 * t0 + t1 + 8);
+		out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
+	}
+	out[w * 2 - 1] = stbi__div4(t1 + 2);
+
+	STBI_NOTUSED(hs);
+
+	return out;
+}
+#endif
+
+static stbi_uc* stbi__resample_row_generic(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs)
+{
+	// resample with nearest-neighbor
+	int i, j;
+	STBI_NOTUSED(in_far);
+	for (i = 0; i < w; ++i)
+		for (j = 0; j < hs; ++j)
+			out[i * hs + j] = in_near[i];
+	return out;
+}
+
+// this is a reduced-precision calculation of YCbCr-to-RGB introduced
+// to make sure the code produces the same results in both SIMD and scalar
+#define stbi__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
+static void stbi__YCbCr_to_RGB_row(stbi_uc* out, const stbi_uc* y, const stbi_uc* pcb, const stbi_uc* pcr, int count, int step)
+{
+	int i;
+	for (i = 0; i < count; ++i) {
+		int y_fixed = (y[i] << 20) + (1 << 19); // rounding
+		int r, g, b;
+		int cr = pcr[i] - 128;
+		int cb = pcb[i] - 128;
+		r = y_fixed + cr * stbi__float2fixed(1.40200f);
+		g = y_fixed + (cr * -stbi__float2fixed(0.71414f)) + ((cb * -stbi__float2fixed(0.34414f)) & 0xffff0000);
+		b = y_fixed + cb * stbi__float2fixed(1.77200f);
+		r >>= 20;
+		g >>= 20;
+		b >>= 20;
+		if ((unsigned)r > 255) { if (r < 0) r = 0; else r = 255; }
+		if ((unsigned)g > 255) { if (g < 0) g = 0; else g = 255; }
+		if ((unsigned)b > 255) { if (b < 0) b = 0; else b = 255; }
+		out[0] = (stbi_uc)r;
+		out[1] = (stbi_uc)g;
+		out[2] = (stbi_uc)b;
+		out[3] = 255;
+		out += step;
+	}
+}
+
+#if defined(STBI_SSE2) || defined(STBI_NEON)
+static void stbi__YCbCr_to_RGB_simd(stbi_uc* out, stbi_uc const* y, stbi_uc const* pcb, stbi_uc const* pcr, int count, int step)
+{
+	int i = 0;
+
+#ifdef STBI_SSE2
+	// step == 3 is pretty ugly on the final interleave, and i'm not convinced
+	// it's useful in practice (you wouldn't use it for textures, for example).
+	// so just accelerate step == 4 case.
+	if (step == 4) {
+		// this is a fairly straightforward implementation and not super-optimized.
+		__m128i signflip = _mm_set1_epi8(-0x80);
+		__m128i cr_const0 = _mm_set1_epi16((short)(1.40200f * 4096.0f + 0.5f));
+		__m128i cr_const1 = _mm_set1_epi16(-(short)(0.71414f * 4096.0f + 0.5f));
+		__m128i cb_const0 = _mm_set1_epi16(-(short)(0.34414f * 4096.0f + 0.5f));
+		__m128i cb_const1 = _mm_set1_epi16((short)(1.77200f * 4096.0f + 0.5f));
+		__m128i y_bias = _mm_set1_epi8((char)(unsigned char)128);
+		__m128i xw = _mm_set1_epi16(255); // alpha channel
+
+		for (; i + 7 < count; i += 8) {
+			// load
+			__m128i y_bytes = _mm_loadl_epi64((__m128i*) (y + i));
+			__m128i cr_bytes = _mm_loadl_epi64((__m128i*) (pcr + i));
+			__m128i cb_bytes = _mm_loadl_epi64((__m128i*) (pcb + i));
+			__m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
+			__m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
+
+			// unpack to short (and left-shift cr, cb by 8)
+			__m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes);
+			__m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
+			__m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
+
+			// color transform
+			__m128i yws = _mm_srli_epi16(yw, 4);
+			__m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
+			__m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
+			__m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
+			__m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
+			__m128i rws = _mm_add_epi16(cr0, yws);
+			__m128i gwt = _mm_add_epi16(cb0, yws);
+			__m128i bws = _mm_add_epi16(yws, cb1);
+			__m128i gws = _mm_add_epi16(gwt, cr1);
+
+			// descale
+			__m128i rw = _mm_srai_epi16(rws, 4);
+			__m128i bw = _mm_srai_epi16(bws, 4);
+			__m128i gw = _mm_srai_epi16(gws, 4);
+
+			// back to byte, set up for transpose
+			__m128i brb = _mm_packus_epi16(rw, bw);
+			__m128i gxb = _mm_packus_epi16(gw, xw);
+
+			// transpose to interleave channels
+			__m128i t0 = _mm_unpacklo_epi8(brb, gxb);
+			__m128i t1 = _mm_unpackhi_epi8(brb, gxb);
+			__m128i o0 = _mm_unpacklo_epi16(t0, t1);
+			__m128i o1 = _mm_unpackhi_epi16(t0, t1);
+
+			// store
+			_mm_storeu_si128((__m128i*) (out + 0), o0);
+			_mm_storeu_si128((__m128i*) (out + 16), o1);
+			out += 32;
+		}
+	}
+#endif
+
+#ifdef STBI_NEON
+	// in this version, step=3 support would be easy to add. but is there demand?
+	if (step == 4) {
+		// this is a fairly straightforward implementation and not super-optimized.
+		uint8x8_t signflip = vdup_n_u8(0x80);
+		int16x8_t cr_const0 = vdupq_n_s16((short)(1.40200f * 4096.0f + 0.5f));
+		int16x8_t cr_const1 = vdupq_n_s16(-(short)(0.71414f * 4096.0f + 0.5f));
+		int16x8_t cb_const0 = vdupq_n_s16(-(short)(0.34414f * 4096.0f + 0.5f));
+		int16x8_t cb_const1 = vdupq_n_s16((short)(1.77200f * 4096.0f + 0.5f));
+
+		for (; i + 7 < count; i += 8) {
+			// load
+			uint8x8_t y_bytes = vld1_u8(y + i);
+			uint8x8_t cr_bytes = vld1_u8(pcr + i);
+			uint8x8_t cb_bytes = vld1_u8(pcb + i);
+			int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
+			int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
+
+			// expand to s16
+			int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
+			int16x8_t crw = vshll_n_s8(cr_biased, 7);
+			int16x8_t cbw = vshll_n_s8(cb_biased, 7);
+
+			// color transform
+			int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
+			int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
+			int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
+			int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
+			int16x8_t rws = vaddq_s16(yws, cr0);
+			int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
+			int16x8_t bws = vaddq_s16(yws, cb1);
+
+			// undo scaling, round, convert to byte
+			uint8x8x4_t o;
+			o.val[0] = vqrshrun_n_s16(rws, 4);
+			o.val[1] = vqrshrun_n_s16(gws, 4);
+			o.val[2] = vqrshrun_n_s16(bws, 4);
+			o.val[3] = vdup_n_u8(255);
+
+			// store, interleaving r/g/b/a
+			vst4_u8(out, o);
+			out += 8 * 4;
+		}
+	}
+#endif
+
+	for (; i < count; ++i) {
+		int y_fixed = (y[i] << 20) + (1 << 19); // rounding
+		int r, g, b;
+		int cr = pcr[i] - 128;
+		int cb = pcb[i] - 128;
+		r = y_fixed + cr * stbi__float2fixed(1.40200f);
+		g = y_fixed + cr * -stbi__float2fixed(0.71414f) + ((cb * -stbi__float2fixed(0.34414f)) & 0xffff0000);
+		b = y_fixed + cb * stbi__float2fixed(1.77200f);
+		r >>= 20;
+		g >>= 20;
+		b >>= 20;
+		if ((unsigned)r > 255) { if (r < 0) r = 0; else r = 255; }
+		if ((unsigned)g > 255) { if (g < 0) g = 0; else g = 255; }
+		if ((unsigned)b > 255) { if (b < 0) b = 0; else b = 255; }
+		out[0] = (stbi_uc)r;
+		out[1] = (stbi_uc)g;
+		out[2] = (stbi_uc)b;
+		out[3] = 255;
+		out += step;
+	}
+}
+#endif
+
+// set up the kernels
+static void stbi__setup_jpeg(stbi__jpeg* j)
+{
+	j->idct_block_kernel = stbi__idct_block;
+	j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
+	j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
+
+#ifdef STBI_SSE2
+	if (stbi__sse2_available()) {
+		j->idct_block_kernel = stbi__idct_simd;
+		j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
+		j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
+	}
+#endif
+
+#ifdef STBI_NEON
+	j->idct_block_kernel = stbi__idct_simd;
+	j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
+	j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
+#endif
+}
+
+// clean up the temporary component buffers
+static void stbi__cleanup_jpeg(stbi__jpeg* j)
+{
+	stbi__free_jpeg_components(j, j->s->img_n, 0);
+}
+
+typedef struct
+{
+	resample_row_func resample;
+	stbi_uc* line0, * line1;
+	int hs, vs;   // expansion factor in each axis
+	int w_lores; // horizontal pixels pre-expansion
+	int ystep;   // how far through vertical expansion we are
+	int ypos;    // which pre-expansion row we're on
+} stbi__resample;
+
+// fast 0..255 * 0..255 => 0..255 rounded multiplication
+static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
+{
+	unsigned int t = x * y + 128;
+	return (stbi_uc)((t + (t >> 8)) >> 8);
+}
+
+static stbi_uc* load_jpeg_image(stbi__jpeg* z, int* out_x, int* out_y, int* comp, int req_comp)
+{
+	int n, decode_n, is_rgb;
+	z->s->img_n = 0; // make stbi__cleanup_jpeg safe
+
+	// validate req_comp
+	if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
+
+	// load a jpeg image from whichever source, but leave in YCbCr format
+	if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
+
+	// determine actual number of components to generate
+	n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
+
+	is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
+
+	if (z->s->img_n == 3 && n < 3 && !is_rgb)
+		decode_n = 1;
+	else
+		decode_n = z->s->img_n;
+
+	// resample and color-convert
+	{
+		int k;
+		unsigned int i, j;
+		stbi_uc* output;
+		stbi_uc* coutput[4];
+
+		stbi__resample res_comp[4];
+
+		for (k = 0; k < decode_n; ++k) {
+			stbi__resample* r = &res_comp[k];
+
+			// allocate line buffer big enough for upsampling off the edges
+			// with upsample factor of 4
+			z->img_comp[k].linebuf = (stbi_uc*)stbi__malloc(z->s->img_x + 3);
+			if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
+
+			r->hs = z->img_h_max / z->img_comp[k].h;
+			r->vs = z->img_v_max / z->img_comp[k].v;
+			r->ystep = r->vs >> 1;
+			r->w_lores = (z->s->img_x + r->hs - 1) / r->hs;
+			r->ypos = 0;
+			r->line0 = r->line1 = z->img_comp[k].data;
+
+			if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
+			else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
+			else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
+			else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
+			else                               r->resample = stbi__resample_row_generic;
+		}
+
+		// can't error after this so, this is safe
+		output = (stbi_uc*)stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
+		if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
+
+		// now go ahead and resample
+		for (j = 0; j < z->s->img_y; ++j) {
+			stbi_uc* out = output + n * z->s->img_x * j;
+			for (k = 0; k < decode_n; ++k) {
+				stbi__resample* r = &res_comp[k];
+				int y_bot = r->ystep >= (r->vs >> 1);
+				coutput[k] = r->resample(z->img_comp[k].linebuf,
+					y_bot ? r->line1 : r->line0,
+					y_bot ? r->line0 : r->line1,
+					r->w_lores, r->hs);
+				if (++r->ystep >= r->vs) {
+					r->ystep = 0;
+					r->line0 = r->line1;
+					if (++r->ypos < z->img_comp[k].y)
+						r->line1 += z->img_comp[k].w2;
+				}
+			}
+			if (n >= 3) {
+				stbi_uc* y = coutput[0];
+				if (z->s->img_n == 3) {
+					if (is_rgb) {
+						for (i = 0; i < z->s->img_x; ++i) {
+							out[0] = y[i];
+							out[1] = coutput[1][i];
+							out[2] = coutput[2][i];
+							out[3] = 255;
+							out += n;
+						}
+					}
+					else {
+						z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+					}
+				}
+				else if (z->s->img_n == 4) {
+					if (z->app14_color_transform == 0) { // CMYK
+						for (i = 0; i < z->s->img_x; ++i) {
+							stbi_uc m = coutput[3][i];
+							out[0] = stbi__blinn_8x8(coutput[0][i], m);
+							out[1] = stbi__blinn_8x8(coutput[1][i], m);
+							out[2] = stbi__blinn_8x8(coutput[2][i], m);
+							out[3] = 255;
+							out += n;
+						}
+					}
+					else if (z->app14_color_transform == 2) { // YCCK
+						z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+						for (i = 0; i < z->s->img_x; ++i) {
+							stbi_uc m = coutput[3][i];
+							out[0] = stbi__blinn_8x8(255 - out[0], m);
+							out[1] = stbi__blinn_8x8(255 - out[1], m);
+							out[2] = stbi__blinn_8x8(255 - out[2], m);
+							out += n;
+						}
+					}
+					else { // YCbCr + alpha?  Ignore the fourth channel for now
+						z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+					}
+				}
+				else
+					for (i = 0; i < z->s->img_x; ++i) {
+						out[0] = out[1] = out[2] = y[i];
+						out[3] = 255; // not used if n==3
+						out += n;
+					}
+			}
+			else {
+				if (is_rgb) {
+					if (n == 1)
+						for (i = 0; i < z->s->img_x; ++i)
+							* out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
+					else {
+						for (i = 0; i < z->s->img_x; ++i, out += 2) {
+							out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
+							out[1] = 255;
+						}
+					}
+				}
+				else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
+					for (i = 0; i < z->s->img_x; ++i) {
+						stbi_uc m = coutput[3][i];
+						stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
+						stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
+						stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
+						out[0] = stbi__compute_y(r, g, b);
+						out[1] = 255;
+						out += n;
+					}
+				}
+				else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
+					for (i = 0; i < z->s->img_x; ++i) {
+						out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
+						out[1] = 255;
+						out += n;
+					}
+				}
+				else {
+					stbi_uc* y = coutput[0];
+					if (n == 1)
+						for (i = 0; i < z->s->img_x; ++i) out[i] = y[i];
+					else
+						for (i = 0; i < z->s->img_x; ++i)* out++ = y[i], * out++ = 255;
+				}
+			}
+		}
+		stbi__cleanup_jpeg(z);
+		*out_x = z->s->img_x;
+		*out_y = z->s->img_y;
+		if (comp)* comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
+		return output;
+	}
+}
+
+static void* stbi__jpeg_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri)
+{
+	unsigned char* result;
+	stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
+	STBI_NOTUSED(ri);
+	j->s = s;
+	stbi__setup_jpeg(j);
+	result = load_jpeg_image(j, x, y, comp, req_comp);
+	STBI_FREE(j);
+	return result;
+}
+
+static int stbi__jpeg_test(stbi__context* s)
+{
+	int r;
+	stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
+	j->s = s;
+	stbi__setup_jpeg(j);
+	r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
+	stbi__rewind(s);
+	STBI_FREE(j);
+	return r;
+}
+
+static int stbi__jpeg_info_raw(stbi__jpeg* j, int* x, int* y, int* comp)
+{
+	if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
+		stbi__rewind(j->s);
+		return 0;
+	}
+	if (x)* x = j->s->img_x;
+	if (y)* y = j->s->img_y;
+	if (comp)* comp = j->s->img_n >= 3 ? 3 : 1;
+	return 1;
+}
+
+static int stbi__jpeg_info(stbi__context* s, int* x, int* y, int* comp)
+{
+	int result;
+	stbi__jpeg* j = (stbi__jpeg*)(stbi__malloc(sizeof(stbi__jpeg)));
+	j->s = s;
+	result = stbi__jpeg_info_raw(j, x, y, comp);
+	STBI_FREE(j);
+	return result;
+}
+#endif
+
+// public domain zlib decode    v0.2  Sean Barrett 2006-11-18
+//    simple implementation
+//      - all input must be provided in an upfront buffer
+//      - all output is written to a single output buffer (can malloc/realloc)
+//    performance
+//      - fast huffman
+
+#ifndef STBI_NO_ZLIB
+
+// fast-way is faster to check than jpeg huffman, but slow way is slower
+#define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
+#define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)
+
+// zlib-style huffman encoding
+// (jpegs packs from left, zlib from right, so can't share code)
+typedef struct
+{
+	stbi__uint16 fast[1 << STBI__ZFAST_BITS];
+	stbi__uint16 firstcode[16];
+	int maxcode[17];
+	stbi__uint16 firstsymbol[16];
+	stbi_uc  size[288];
+	stbi__uint16 value[288];
+} stbi__zhuffman;
+
+stbi_inline static int stbi__bitreverse16(int n)
+{
+	n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
+	n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
+	n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
+	n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
+	return n;
+}
+
+stbi_inline static int stbi__bit_reverse(int v, int bits)
+{
+	STBI_ASSERT(bits <= 16);
+	// to bit reverse n bits, reverse 16 and shift
+	// e.g. 11 bits, bit reverse and shift away 5
+	return stbi__bitreverse16(v) >> (16 - bits);
+}
+
+static int stbi__zbuild_huffman(stbi__zhuffman* z, const stbi_uc* sizelist, int num)
+{
+	int i, k = 0;
+	int code, next_code[16], sizes[17];
+
+	// DEFLATE spec for generating codes
+	memset(sizes, 0, sizeof(sizes));
+	memset(z->fast, 0, sizeof(z->fast));
+	for (i = 0; i < num; ++i)
+		++sizes[sizelist[i]];
+	sizes[0] = 0;
+	for (i = 1; i < 16; ++i)
+		if (sizes[i] > (1 << i))
+			return stbi__err("bad sizes", "Corrupt PNG");
+	code = 0;
+	for (i = 1; i < 16; ++i) {
+		next_code[i] = code;
+		z->firstcode[i] = (stbi__uint16)code;
+		z->firstsymbol[i] = (stbi__uint16)k;
+		code = (code + sizes[i]);
+		if (sizes[i])
+			if (code - 1 >= (1 << i)) return stbi__err("bad codelengths", "Corrupt PNG");
+		z->maxcode[i] = code << (16 - i); // preshift for inner loop
+		code <<= 1;
+		k += sizes[i];
+	}
+	z->maxcode[16] = 0x10000; // sentinel
+	for (i = 0; i < num; ++i) {
+		int s = sizelist[i];
+		if (s) {
+			int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
+			stbi__uint16 fastv = (stbi__uint16)((s << 9) | i);
+			z->size[c] = (stbi_uc)s;
+			z->value[c] = (stbi__uint16)i;
+			if (s <= STBI__ZFAST_BITS) {
+				int j = stbi__bit_reverse(next_code[s], s);
+				while (j < (1 << STBI__ZFAST_BITS)) {
+					z->fast[j] = fastv;
+					j += (1 << s);
+				}
+			}
+			++next_code[s];
+		}
+	}
+	return 1;
+}
+
+// zlib-from-memory implementation for PNG reading
+//    because PNG allows splitting the zlib stream arbitrarily,
+//    and it's annoying structurally to have PNG call ZLIB call PNG,
+//    we require PNG read all the IDATs and combine them into a single
+//    memory buffer
+
+typedef struct
+{
+	stbi_uc* zbuffer, * zbuffer_end;
+	int num_bits;
+	stbi__uint32 code_buffer;
+
+	char* zout;
+	char* zout_start;
+	char* zout_end;
+	int   z_expandable;
+
+	stbi__zhuffman z_length, z_distance;
+} stbi__zbuf;
+
+stbi_inline static stbi_uc stbi__zget8(stbi__zbuf* z)
+{
+	if (z->zbuffer >= z->zbuffer_end) return 0;
+	return *z->zbuffer++;
+}
+
+static void stbi__fill_bits(stbi__zbuf* z)
+{
+	do {
+		STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
+		z->code_buffer |= (unsigned int)stbi__zget8(z) << z->num_bits;
+		z->num_bits += 8;
+	} while (z->num_bits <= 24);
+}
+
+stbi_inline static unsigned int stbi__zreceive(stbi__zbuf* z, int n)
+{
+	unsigned int k;
+	if (z->num_bits < n) stbi__fill_bits(z);
+	k = z->code_buffer & ((1 << n) - 1);
+	z->code_buffer >>= n;
+	z->num_bits -= n;
+	return k;
+}
+
+static int stbi__zhuffman_decode_slowpath(stbi__zbuf* a, stbi__zhuffman* z)
+{
+	int b, s, k;
+	// not resolved by fast table, so compute it the slow way
+	// use jpeg approach, which requires MSbits at top
+	k = stbi__bit_reverse(a->code_buffer, 16);
+	for (s = STBI__ZFAST_BITS + 1; ; ++s)
+		if (k < z->maxcode[s])
+			break;
+	if (s == 16) return -1; // invalid code!
+	// code size is s, so:
+	b = (k >> (16 - s)) - z->firstcode[s] + z->firstsymbol[s];
+	STBI_ASSERT(z->size[b] == s);
+	a->code_buffer >>= s;
+	a->num_bits -= s;
+	return z->value[b];
+}
+
+stbi_inline static int stbi__zhuffman_decode(stbi__zbuf* a, stbi__zhuffman* z)
+{
+	int b, s;
+	if (a->num_bits < 16) stbi__fill_bits(a);
+	b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
+	if (b) {
+		s = b >> 9;
+		a->code_buffer >>= s;
+		a->num_bits -= s;
+		return b & 511;
+	}
+	return stbi__zhuffman_decode_slowpath(a, z);
+}
+
+static int stbi__zexpand(stbi__zbuf* z, char* zout, int n)  // need to make room for n bytes
+{
+	char* q;
+	int cur, limit, old_limit;
+	z->zout = zout;
+	if (!z->z_expandable) return stbi__err("output buffer limit", "Corrupt PNG");
+	cur = (int)(z->zout - z->zout_start);
+	limit = old_limit = (int)(z->zout_end - z->zout_start);
+	while (cur + n > limit)
+		limit *= 2;
+	q = (char*)STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
+	STBI_NOTUSED(old_limit);
+	if (q == NULL) return stbi__err("outofmem", "Out of memory");
+	z->zout_start = q;
+	z->zout = q + cur;
+	z->zout_end = q + limit;
+	return 1;
+}
+
+static const int stbi__zlength_base[31] = {
+   3,4,5,6,7,8,9,10,11,13,
+   15,17,19,23,27,31,35,43,51,59,
+   67,83,99,115,131,163,195,227,258,0,0 };
+
+static const int stbi__zlength_extra[31] =
+{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
+
+static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
+257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0 };
+
+static const int stbi__zdist_extra[32] =
+{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 };
+
+static int stbi__parse_huffman_block(stbi__zbuf* a)
+{
+	char* zout = a->zout;
+	for (;;) {
+		int z = stbi__zhuffman_decode(a, &a->z_length);
+		if (z < 256) {
+			if (z < 0) return stbi__err("bad huffman code", "Corrupt PNG"); // error in huffman codes
+			if (zout >= a->zout_end) {
+				if (!stbi__zexpand(a, zout, 1)) return 0;
+				zout = a->zout;
+			}
+			*zout++ = (char)z;
+		}
+		else {
+			stbi_uc* p;
+			int len, dist;
+			if (z == 256) {
+				a->zout = zout;
+				return 1;
+			}
+			z -= 257;
+			len = stbi__zlength_base[z];
+			if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
+			z = stbi__zhuffman_decode(a, &a->z_distance);
+			if (z < 0) return stbi__err("bad huffman code", "Corrupt PNG");
+			dist = stbi__zdist_base[z];
+			if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
+			if (zout - a->zout_start < dist) return stbi__err("bad dist", "Corrupt PNG");
+			if (zout + len > a->zout_end) {
+				if (!stbi__zexpand(a, zout, len)) return 0;
+				zout = a->zout;
+			}
+			p = (stbi_uc*)(zout - dist);
+			if (dist == 1) { // run of one byte; common in images.
+				stbi_uc v = *p;
+				if (len) { do *zout++ = v; while (--len); }
+			}
+			else {
+				if (len) { do *zout++ = *p++; while (--len); }
+			}
+		}
+	}
+}
+
+static int stbi__compute_huffman_codes(stbi__zbuf* a)
+{
+	static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
+	stbi__zhuffman z_codelength;
+	stbi_uc lencodes[286 + 32 + 137];//padding for maximum single op
+	stbi_uc codelength_sizes[19];
+	int i, n;
+
+	int hlit = stbi__zreceive(a, 5) + 257;
+	int hdist = stbi__zreceive(a, 5) + 1;
+	int hclen = stbi__zreceive(a, 4) + 4;
+	int ntot = hlit + hdist;
+
+	memset(codelength_sizes, 0, sizeof(codelength_sizes));
+	for (i = 0; i < hclen; ++i) {
+		int s = stbi__zreceive(a, 3);
+		codelength_sizes[length_dezigzag[i]] = (stbi_uc)s;
+	}
+	if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
+
+	n = 0;
+	while (n < ntot) {
+		int c = stbi__zhuffman_decode(a, &z_codelength);
+		if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
+		if (c < 16)
+			lencodes[n++] = (stbi_uc)c;
+		else {
+			stbi_uc fill = 0;
+			if (c == 16) {
+				c = stbi__zreceive(a, 2) + 3;
+				if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
+				fill = lencodes[n - 1];
+			}
+			else if (c == 17)
+				c = stbi__zreceive(a, 3) + 3;
+			else {
+				STBI_ASSERT(c == 18);
+				c = stbi__zreceive(a, 7) + 11;
+			}
+			if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
+			memset(lencodes + n, fill, c);
+			n += c;
+		}
+	}
+	if (n != ntot) return stbi__err("bad codelengths", "Corrupt PNG");
+	if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
+	if (!stbi__zbuild_huffman(&a->z_distance, lencodes + hlit, hdist)) return 0;
+	return 1;
+}
+
+static int stbi__parse_uncompressed_block(stbi__zbuf* a)
+{
+	stbi_uc header[4];
+	int len, nlen, k;
+	if (a->num_bits & 7)
+		stbi__zreceive(a, a->num_bits & 7); // discard
+	 // drain the bit-packed data into header
+	k = 0;
+	while (a->num_bits > 0) {
+		header[k++] = (stbi_uc)(a->code_buffer & 255); // suppress MSVC run-time check
+		a->code_buffer >>= 8;
+		a->num_bits -= 8;
+	}
+	STBI_ASSERT(a->num_bits == 0);
+	// now fill header the normal way
+	while (k < 4)
+		header[k++] = stbi__zget8(a);
+	len = header[1] * 256 + header[0];
+	nlen = header[3] * 256 + header[2];
+	if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt", "Corrupt PNG");
+	if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer", "Corrupt PNG");
+	if (a->zout + len > a->zout_end)
+		if (!stbi__zexpand(a, a->zout, len)) return 0;
+	memcpy(a->zout, a->zbuffer, len);
+	a->zbuffer += len;
+	a->zout += len;
+	return 1;
+}
+
+static int stbi__parse_zlib_header(stbi__zbuf* a)
+{
+	int cmf = stbi__zget8(a);
+	int cm = cmf & 15;
+	/* int cinfo = cmf >> 4; */
+	int flg = stbi__zget8(a);
+	if ((cmf * 256 + flg) % 31 != 0) return stbi__err("bad zlib header", "Corrupt PNG"); // zlib spec
+	if (flg & 32) return stbi__err("no preset dict", "Corrupt PNG"); // preset dictionary not allowed in png
+	if (cm != 8) return stbi__err("bad compression", "Corrupt PNG"); // DEFLATE required for png
+	// window = 1 << (8 + cinfo)... but who cares, we fully buffer output
+	return 1;
+}
+
+static const stbi_uc stbi__zdefault_length[288] =
+{
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
+};
+static const stbi_uc stbi__zdefault_distance[32] =
+{
+   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
+};
+/*
+Init algorithm:
+{
+   int i;   // use <= to match clearly with spec
+   for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;
+   for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9;
+   for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7;
+   for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8;
+
+   for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;
+}
+*/
+
+static int stbi__parse_zlib(stbi__zbuf* a, int parse_header)
+{
+	int final, type;
+	if (parse_header)
+		if (!stbi__parse_zlib_header(a)) return 0;
+	a->num_bits = 0;
+	a->code_buffer = 0;
+	do {
+		final = stbi__zreceive(a, 1);
+		type = stbi__zreceive(a, 2);
+		if (type == 0) {
+			if (!stbi__parse_uncompressed_block(a)) return 0;
+		}
+		else if (type == 3) {
+			return 0;
+		}
+		else {
+			if (type == 1) {
+				// use fixed code lengths
+				if (!stbi__zbuild_huffman(&a->z_length, stbi__zdefault_length, 288)) return 0;
+				if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0;
+			}
+			else {
+				if (!stbi__compute_huffman_codes(a)) return 0;
+			}
+			if (!stbi__parse_huffman_block(a)) return 0;
+		}
+	} while (!final);
+	return 1;
+}
+
+static int stbi__do_zlib(stbi__zbuf* a, char* obuf, int olen, int exp, int parse_header)
+{
+	a->zout_start = obuf;
+	a->zout = obuf;
+	a->zout_end = obuf + olen;
+	a->z_expandable = exp;
+
+	return stbi__parse_zlib(a, parse_header);
+}
+
+STBIDEF char* stbi_zlib_decode_malloc_guesssize(const char* buffer, int len, int initial_size, int* outlen)
+{
+	stbi__zbuf a;
+	char* p = (char*)stbi__malloc(initial_size);
+	if (p == NULL) return NULL;
+	a.zbuffer = (stbi_uc*)buffer;
+	a.zbuffer_end = (stbi_uc*)buffer + len;
+	if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
+		if (outlen)* outlen = (int)(a.zout - a.zout_start);
+		return a.zout_start;
+	}
+	else {
+		STBI_FREE(a.zout_start);
+		return NULL;
+	}
+}
+
+STBIDEF char* stbi_zlib_decode_malloc(char const* buffer, int len, int* outlen)
+{
+	return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
+}
+
+STBIDEF char* stbi_zlib_decode_malloc_guesssize_headerflag(const char* buffer, int len, int initial_size, int* outlen, int parse_header)
+{
+	stbi__zbuf a;
+	char* p = (char*)stbi__malloc(initial_size);
+	if (p == NULL) return NULL;
+	a.zbuffer = (stbi_uc*)buffer;
+	a.zbuffer_end = (stbi_uc*)buffer + len;
+	if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
+		if (outlen)* outlen = (int)(a.zout - a.zout_start);
+		return a.zout_start;
+	}
+	else {
+		STBI_FREE(a.zout_start);
+		return NULL;
+	}
+}
+
+STBIDEF int stbi_zlib_decode_buffer(char* obuffer, int olen, char const* ibuffer, int ilen)
+{
+	stbi__zbuf a;
+	a.zbuffer = (stbi_uc*)ibuffer;
+	a.zbuffer_end = (stbi_uc*)ibuffer + ilen;
+	if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
+		return (int)(a.zout - a.zout_start);
+	else
+		return -1;
+}
+
+STBIDEF char* stbi_zlib_decode_noheader_malloc(char const* buffer, int len, int* outlen)
+{
+	stbi__zbuf a;
+	char* p = (char*)stbi__malloc(16384);
+	if (p == NULL) return NULL;
+	a.zbuffer = (stbi_uc*)buffer;
+	a.zbuffer_end = (stbi_uc*)buffer + len;
+	if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
+		if (outlen)* outlen = (int)(a.zout - a.zout_start);
+		return a.zout_start;
+	}
+	else {
+		STBI_FREE(a.zout_start);
+		return NULL;
+	}
+}
+
+STBIDEF int stbi_zlib_decode_noheader_buffer(char* obuffer, int olen, const char* ibuffer, int ilen)
+{
+	stbi__zbuf a;
+	a.zbuffer = (stbi_uc*)ibuffer;
+	a.zbuffer_end = (stbi_uc*)ibuffer + ilen;
+	if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
+		return (int)(a.zout - a.zout_start);
+	else
+		return -1;
+}
+#endif
+
+// public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
+//    simple implementation
+//      - only 8-bit samples
+//      - no CRC checking
+//      - allocates lots of intermediate memory
+//        - avoids problem of streaming data between subsystems
+//        - avoids explicit window management
+//    performance
+//      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
+
+#ifndef STBI_NO_PNG
+typedef struct
+{
+	stbi__uint32 length;
+	stbi__uint32 type;
+} stbi__pngchunk;
+
+static stbi__pngchunk stbi__get_chunk_header(stbi__context* s)
+{
+	stbi__pngchunk c;
+	c.length = stbi__get32be(s);
+	c.type = stbi__get32be(s);
+	return c;
+}
+
+static int stbi__check_png_header(stbi__context* s)
+{
+	static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
+	int i;
+	for (i = 0; i < 8; ++i)
+		if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig", "Not a PNG");
+	return 1;
+}
+
+typedef struct
+{
+	stbi__context* s;
+	stbi_uc* idata, * expanded, * out;
+	int depth;
+} stbi__png;
+
+
+enum {
+	STBI__F_none = 0,
+	STBI__F_sub = 1,
+	STBI__F_up = 2,
+	STBI__F_avg = 3,
+	STBI__F_paeth = 4,
+	// synthetic filters used for first scanline to avoid needing a dummy row of 0s
+	STBI__F_avg_first,
+	STBI__F_paeth_first
+};
+
+static stbi_uc first_row_filter[5] =
+{
+   STBI__F_none,
+   STBI__F_sub,
+   STBI__F_none,
+   STBI__F_avg_first,
+   STBI__F_paeth_first
+};
+
+static int stbi__paeth(int a, int b, int c)
+{
+	int p = a + b - c;
+	int pa = abs(p - a);
+	int pb = abs(p - b);
+	int pc = abs(p - c);
+	if (pa <= pb && pa <= pc) return a;
+	if (pb <= pc) return b;
+	return c;
+}
+
+static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
+
+// create the png data from post-deflated data
+static int stbi__create_png_image_raw(stbi__png* a, stbi_uc* raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
+{
+	int bytes = (depth == 16 ? 2 : 1);
+	stbi__context* s = a->s;
+	stbi__uint32 i, j, stride = x * out_n * bytes;
+	stbi__uint32 img_len, img_width_bytes;
+	int k;
+	int img_n = s->img_n; // copy it into a local for later
+
+	int output_bytes = out_n * bytes;
+	int filter_bytes = img_n * bytes;
+	int width = x;
+
+	STBI_ASSERT(out_n == s->img_n || out_n == s->img_n + 1);
+	a->out = (stbi_uc*)stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
+	if (!a->out) return stbi__err("outofmem", "Out of memory");
+
+	if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG");
+	img_width_bytes = (((img_n * x * depth) + 7) >> 3);
+	img_len = (img_width_bytes + 1) * y;
+
+	// we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
+	// but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
+	// so just check for raw_len < img_len always.
+	if (raw_len < img_len) return stbi__err("not enough pixels", "Corrupt PNG");
+
+	for (j = 0; j < y; ++j) {
+		stbi_uc* cur = a->out + stride * j;
+		stbi_uc* prior;
+		int filter = *raw++;
+
+		if (filter > 4)
+			return stbi__err("invalid filter", "Corrupt PNG");
+
+		if (depth < 8) {
+			STBI_ASSERT(img_width_bytes <= x);
+			cur += x * out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
+			filter_bytes = 1;
+			width = img_width_bytes;
+		}
+		prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above
+
+		// if first row, use special filter that doesn't sample previous row
+		if (j == 0) filter = first_row_filter[filter];
+
+		// handle first byte explicitly
+		for (k = 0; k < filter_bytes; ++k) {
+			switch (filter) {
+			case STBI__F_none: cur[k] = raw[k]; break;
+			case STBI__F_sub: cur[k] = raw[k]; break;
+			case STBI__F_up: cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
+			case STBI__F_avg: cur[k] = STBI__BYTECAST(raw[k] + (prior[k] >> 1)); break;
+			case STBI__F_paeth: cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0, prior[k], 0)); break;
+			case STBI__F_avg_first: cur[k] = raw[k]; break;
+			case STBI__F_paeth_first: cur[k] = raw[k]; break;
+			}
+		}
+
+		if (depth == 8) {
+			if (img_n != out_n)
+				cur[img_n] = 255; // first pixel
+			raw += img_n;
+			cur += out_n;
+			prior += out_n;
+		}
+		else if (depth == 16) {
+			if (img_n != out_n) {
+				cur[filter_bytes] = 255; // first pixel top byte
+				cur[filter_bytes + 1] = 255; // first pixel bottom byte
+			}
+			raw += filter_bytes;
+			cur += output_bytes;
+			prior += output_bytes;
+		}
+		else {
+			raw += 1;
+			cur += 1;
+			prior += 1;
+		}
+
+		// this is a little gross, so that we don't switch per-pixel or per-component
+		if (depth < 8 || img_n == out_n) {
+			int nk = (width - 1) * filter_bytes;
+#define STBI__CASE(f) \
+			 case f:     \
+				for (k=0; k < nk; ++k)
+			switch (filter) {
+				// "none" filter turns into a memcpy here; make that explicit.
+			case STBI__F_none:         memcpy(cur, raw, nk); break;
+				STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k - filter_bytes]); } break;
+				STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
+				STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k - filter_bytes]) >> 1)); } break;
+				STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - filter_bytes], prior[k], prior[k - filter_bytes])); } break;
+				STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k - filter_bytes] >> 1)); } break;
+				STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - filter_bytes], 0, 0)); } break;
+			}
+#undef STBI__CASE
+			raw += nk;
+		}
+		else {
+			STBI_ASSERT(img_n + 1 == out_n);
+#define STBI__CASE(f) \
+			 case f:     \
+				for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
+				   for (k=0; k < filter_bytes; ++k)
+			switch (filter) {
+				STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break;
+				STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k - output_bytes]); } break;
+				STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
+				STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k - output_bytes]) >> 1)); } break;
+				STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - output_bytes], prior[k], prior[k - output_bytes])); } break;
+				STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k - output_bytes] >> 1)); } break;
+				STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - output_bytes], 0, 0)); } break;
+			}
+#undef STBI__CASE
+
+			// the loop above sets the high byte of the pixels' alpha, but for
+			// 16 bit png files we also need the low byte set. we'll do that here.
+			if (depth == 16) {
+				cur = a->out + stride * j; // start at the beginning of the row again
+				for (i = 0; i < x; ++i, cur += output_bytes) {
+					cur[filter_bytes + 1] = 255;
+				}
+			}
+		}
+	}
+
+	// we make a separate pass to expand bits to pixels; for performance,
+	// this could run two scanlines behind the above code, so it won't
+	// intefere with filtering but will still be in the cache.
+	if (depth < 8) {
+		for (j = 0; j < y; ++j) {
+			stbi_uc* cur = a->out + stride * j;
+			stbi_uc* in = a->out + stride * j + x * out_n - img_width_bytes;
+			// unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
+			// png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
+			stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
+
+			// note that the final byte might overshoot and write more data than desired.
+			// we can allocate enough data that this never writes out of memory, but it
+			// could also overwrite the next scanline. can it overwrite non-empty data
+			// on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
+			// so we need to explicitly clamp the final ones
+
+			if (depth == 4) {
+				for (k = x * img_n; k >= 2; k -= 2, ++in) {
+					*cur++ = scale * ((*in >> 4));
+					*cur++ = scale * ((*in) & 0x0f);
+				}
+				if (k > 0)* cur++ = scale * ((*in >> 4));
+			}
+			else if (depth == 2) {
+				for (k = x * img_n; k >= 4; k -= 4, ++in) {
+					*cur++ = scale * ((*in >> 6));
+					*cur++ = scale * ((*in >> 4) & 0x03);
+					*cur++ = scale * ((*in >> 2) & 0x03);
+					*cur++ = scale * ((*in) & 0x03);
+				}
+				if (k > 0)* cur++ = scale * ((*in >> 6));
+				if (k > 1)* cur++ = scale * ((*in >> 4) & 0x03);
+				if (k > 2)* cur++ = scale * ((*in >> 2) & 0x03);
+			}
+			else if (depth == 1) {
+				for (k = x * img_n; k >= 8; k -= 8, ++in) {
+					*cur++ = scale * ((*in >> 7));
+					*cur++ = scale * ((*in >> 6) & 0x01);
+					*cur++ = scale * ((*in >> 5) & 0x01);
+					*cur++ = scale * ((*in >> 4) & 0x01);
+					*cur++ = scale * ((*in >> 3) & 0x01);
+					*cur++ = scale * ((*in >> 2) & 0x01);
+					*cur++ = scale * ((*in >> 1) & 0x01);
+					*cur++ = scale * ((*in) & 0x01);
+				}
+				if (k > 0)* cur++ = scale * ((*in >> 7));
+				if (k > 1)* cur++ = scale * ((*in >> 6) & 0x01);
+				if (k > 2)* cur++ = scale * ((*in >> 5) & 0x01);
+				if (k > 3)* cur++ = scale * ((*in >> 4) & 0x01);
+				if (k > 4)* cur++ = scale * ((*in >> 3) & 0x01);
+				if (k > 5)* cur++ = scale * ((*in >> 2) & 0x01);
+				if (k > 6)* cur++ = scale * ((*in >> 1) & 0x01);
+			}
+			if (img_n != out_n) {
+				int q;
+				// insert alpha = 255
+				cur = a->out + stride * j;
+				if (img_n == 1) {
+					for (q = x - 1; q >= 0; --q) {
+						cur[q * 2 + 1] = 255;
+						cur[q * 2 + 0] = cur[q];
+					}
+				}
+				else {
+					STBI_ASSERT(img_n == 3);
+					for (q = x - 1; q >= 0; --q) {
+						cur[q * 4 + 3] = 255;
+						cur[q * 4 + 2] = cur[q * 3 + 2];
+						cur[q * 4 + 1] = cur[q * 3 + 1];
+						cur[q * 4 + 0] = cur[q * 3 + 0];
+					}
+				}
+			}
+		}
+	}
+	else if (depth == 16) {
+		// force the image data from big-endian to platform-native.
+		// this is done in a separate pass due to the decoding relying
+		// on the data being untouched, but could probably be done
+		// per-line during decode if care is taken.
+		stbi_uc* cur = a->out;
+		stbi__uint16* cur16 = (stbi__uint16*)cur;
+
+		for (i = 0; i < x * y * out_n; ++i, cur16++, cur += 2) {
+			*cur16 = (cur[0] << 8) | cur[1];
+		}
+	}
+
+	return 1;
+}
+
+static int stbi__create_png_image(stbi__png* a, stbi_uc* image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
+{
+	int bytes = (depth == 16 ? 2 : 1);
+	int out_bytes = out_n * bytes;
+	stbi_uc* final;
+	int p;
+	if (!interlaced)
+		return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
+
+	// de-interlacing
+	final = (stbi_uc*)stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
+	for (p = 0; p < 7; ++p) {
+		int xorig[] = { 0,4,0,2,0,1,0 };
+		int yorig[] = { 0,0,4,0,2,0,1 };
+		int xspc[] = { 8,8,4,4,2,2,1 };
+		int yspc[] = { 8,8,8,4,4,2,2 };
+		int i, j, x, y;
+		// pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
+		x = (a->s->img_x - xorig[p] + xspc[p] - 1) / xspc[p];
+		y = (a->s->img_y - yorig[p] + yspc[p] - 1) / yspc[p];
+		if (x && y) {
+			stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
+			if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
+				STBI_FREE(final);
+				return 0;
+			}
+			for (j = 0; j < y; ++j) {
+				for (i = 0; i < x; ++i) {
+					int out_y = j * yspc[p] + yorig[p];
+					int out_x = i * xspc[p] + xorig[p];
+					memcpy(final + out_y * a->s->img_x * out_bytes + out_x * out_bytes,
+						a->out + (j * x + i) * out_bytes, out_bytes);
+				}
+			}
+			STBI_FREE(a->out);
+			image_data += img_len;
+			image_data_len -= img_len;
+		}
+	}
+	a->out = final;
+
+	return 1;
+}
+
+static int stbi__compute_transparency(stbi__png* z, stbi_uc tc[3], int out_n)
+{
+	stbi__context* s = z->s;
+	stbi__uint32 i, pixel_count = s->img_x * s->img_y;
+	stbi_uc* p = z->out;
+
+	// compute color-based transparency, assuming we've
+	// already got 255 as the alpha value in the output
+	STBI_ASSERT(out_n == 2 || out_n == 4);
+
+	if (out_n == 2) {
+		for (i = 0; i < pixel_count; ++i) {
+			p[1] = (p[0] == tc[0] ? 0 : 255);
+			p += 2;
+		}
+	}
+	else {
+		for (i = 0; i < pixel_count; ++i) {
+			if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
+				p[3] = 0;
+			p += 4;
+		}
+	}
+	return 1;
+}
+
+static int stbi__compute_transparency16(stbi__png* z, stbi__uint16 tc[3], int out_n)
+{
+	stbi__context* s = z->s;
+	stbi__uint32 i, pixel_count = s->img_x * s->img_y;
+	stbi__uint16* p = (stbi__uint16*)z->out;
+
+	// compute color-based transparency, assuming we've
+	// already got 65535 as the alpha value in the output
+	STBI_ASSERT(out_n == 2 || out_n == 4);
+
+	if (out_n == 2) {
+		for (i = 0; i < pixel_count; ++i) {
+			p[1] = (p[0] == tc[0] ? 0 : 65535);
+			p += 2;
+		}
+	}
+	else {
+		for (i = 0; i < pixel_count; ++i) {
+			if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
+				p[3] = 0;
+			p += 4;
+		}
+	}
+	return 1;
+}
+
+static int stbi__expand_png_palette(stbi__png* a, stbi_uc* palette, int len, int pal_img_n)
+{
+	stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
+	stbi_uc* p, * temp_out, * orig = a->out;
+
+	p = (stbi_uc*)stbi__malloc_mad2(pixel_count, pal_img_n, 0);
+	if (p == NULL) return stbi__err("outofmem", "Out of memory");
+
+	// between here and free(out) below, exitting would leak
+	temp_out = p;
+
+	if (pal_img_n == 3) {
+		for (i = 0; i < pixel_count; ++i) {
+			int n = orig[i] * 4;
+			p[0] = palette[n];
+			p[1] = palette[n + 1];
+			p[2] = palette[n + 2];
+			p += 3;
+		}
+	}
+	else {
+		for (i = 0; i < pixel_count; ++i) {
+			int n = orig[i] * 4;
+			p[0] = palette[n];
+			p[1] = palette[n + 1];
+			p[2] = palette[n + 2];
+			p[3] = palette[n + 3];
+			p += 4;
+		}
+	}
+	STBI_FREE(a->out);
+	a->out = temp_out;
+
+	STBI_NOTUSED(len);
+
+	return 1;
+}
+
+static int stbi__unpremultiply_on_load = 0;
+static int stbi__de_iphone_flag = 0;
+
+STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
+{
+	stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
+}
+
+STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
+{
+	stbi__de_iphone_flag = flag_true_if_should_convert;
+}
+
+static void stbi__de_iphone(stbi__png* z)
+{
+	stbi__context* s = z->s;
+	stbi__uint32 i, pixel_count = s->img_x * s->img_y;
+	stbi_uc* p = z->out;
+
+	if (s->img_out_n == 3) {  // convert bgr to rgb
+		for (i = 0; i < pixel_count; ++i) {
+			stbi_uc t = p[0];
+			p[0] = p[2];
+			p[2] = t;
+			p += 3;
+		}
+	}
+	else {
+		STBI_ASSERT(s->img_out_n == 4);
+		if (stbi__unpremultiply_on_load) {
+			// convert bgr to rgb and unpremultiply
+			for (i = 0; i < pixel_count; ++i) {
+				stbi_uc a = p[3];
+				stbi_uc t = p[0];
+				if (a) {
+					stbi_uc half = a / 2;
+					p[0] = (p[2] * 255 + half) / a;
+					p[1] = (p[1] * 255 + half) / a;
+					p[2] = (t * 255 + half) / a;
+				}
+				else {
+					p[0] = p[2];
+					p[2] = t;
+				}
+				p += 4;
+			}
+		}
+		else {
+			// convert bgr to rgb
+			for (i = 0; i < pixel_count; ++i) {
+				stbi_uc t = p[0];
+				p[0] = p[2];
+				p[2] = t;
+				p += 4;
+			}
+		}
+	}
+}
+
+#define STBI__PNG_TYPE(a,b,c,d)  (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
+
+static int stbi__parse_png_file(stbi__png* z, int scan, int req_comp)
+{
+	stbi_uc palette[1024], pal_img_n = 0;
+	stbi_uc has_trans = 0, tc[3];
+	stbi__uint16 tc16[3];
+	stbi__uint32 ioff = 0, idata_limit = 0, i, pal_len = 0;
+	int first = 1, k, interlace = 0, color = 0, is_iphone = 0;
+	stbi__context* s = z->s;
+
+	z->expanded = NULL;
+	z->idata = NULL;
+	z->out = NULL;
+
+	if (!stbi__check_png_header(s)) return 0;
+
+	if (scan == STBI__SCAN_type) return 1;
+
+	for (;;) {
+		stbi__pngchunk c = stbi__get_chunk_header(s);
+		switch (c.type) {
+		case STBI__PNG_TYPE('C', 'g', 'B', 'I'):
+			is_iphone = 1;
+			stbi__skip(s, c.length);
+			break;
+		case STBI__PNG_TYPE('I', 'H', 'D', 'R'): {
+			int comp, filter;
+			if (!first) return stbi__err("multiple IHDR", "Corrupt PNG");
+			first = 0;
+			if (c.length != 13) return stbi__err("bad IHDR len", "Corrupt PNG");
+			s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large", "Very large image (corrupt?)");
+			s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large", "Very large image (corrupt?)");
+			z->depth = stbi__get8(s);  if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)  return stbi__err("1/2/4/8/16-bit only", "PNG not supported: 1/2/4/8/16-bit only");
+			color = stbi__get8(s);  if (color > 6)         return stbi__err("bad ctype", "Corrupt PNG");
+			if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype", "Corrupt PNG");
+			if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype", "Corrupt PNG");
+			comp = stbi__get8(s);  if (comp) return stbi__err("bad comp method", "Corrupt PNG");
+			filter = stbi__get8(s);  if (filter) return stbi__err("bad filter method", "Corrupt PNG");
+			interlace = stbi__get8(s); if (interlace > 1) return stbi__err("bad interlace method", "Corrupt PNG");
+			if (!s->img_x || !s->img_y) return stbi__err("0-pixel image", "Corrupt PNG");
+			if (!pal_img_n) {
+				s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
+				if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
+				if (scan == STBI__SCAN_header) return 1;
+			}
+			else {
+				// if paletted, then pal_n is our final components, and
+				// img_n is # components to decompress/filter.
+				s->img_n = 1;
+				if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large", "Corrupt PNG");
+				// if SCAN_header, have to scan to see if we have a tRNS
+			}
+			break;
+		}
+
+		case STBI__PNG_TYPE('P', 'L', 'T', 'E'): {
+			if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+			if (c.length > 256 * 3) return stbi__err("invalid PLTE", "Corrupt PNG");
+			pal_len = c.length / 3;
+			if (pal_len * 3 != c.length) return stbi__err("invalid PLTE", "Corrupt PNG");
+			for (i = 0; i < pal_len; ++i) {
+				palette[i * 4 + 0] = stbi__get8(s);
+				palette[i * 4 + 1] = stbi__get8(s);
+				palette[i * 4 + 2] = stbi__get8(s);
+				palette[i * 4 + 3] = 255;
+			}
+			break;
+		}
+
+		case STBI__PNG_TYPE('t', 'R', 'N', 'S'): {
+			if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+			if (z->idata) return stbi__err("tRNS after IDAT", "Corrupt PNG");
+			if (pal_img_n) {
+				if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
+				if (pal_len == 0) return stbi__err("tRNS before PLTE", "Corrupt PNG");
+				if (c.length > pal_len) return stbi__err("bad tRNS len", "Corrupt PNG");
+				pal_img_n = 4;
+				for (i = 0; i < c.length; ++i)
+					palette[i * 4 + 3] = stbi__get8(s);
+			}
+			else {
+				if (!(s->img_n & 1)) return stbi__err("tRNS with alpha", "Corrupt PNG");
+				if (c.length != (stbi__uint32)s->img_n * 2) return stbi__err("bad tRNS len", "Corrupt PNG");
+				has_trans = 1;
+				if (z->depth == 16) {
+					for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
+				}
+				else {
+					for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
+				}
+			}
+			break;
+		}
+
+		case STBI__PNG_TYPE('I', 'D', 'A', 'T'): {
+			if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+			if (pal_img_n && !pal_len) return stbi__err("no PLTE", "Corrupt PNG");
+			if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
+			if ((int)(ioff + c.length) < (int)ioff) return 0;
+			if (ioff + c.length > idata_limit) {
+				stbi__uint32 idata_limit_old = idata_limit;
+				stbi_uc* p;
+				if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
+				while (ioff + c.length > idata_limit)
+					idata_limit *= 2;
+				STBI_NOTUSED(idata_limit_old);
+				p = (stbi_uc*)STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
+				z->idata = p;
+			}
+			if (!stbi__getn(s, z->idata + ioff, c.length)) return stbi__err("outofdata", "Corrupt PNG");
+			ioff += c.length;
+			break;
+		}
+
+		case STBI__PNG_TYPE('I', 'E', 'N', 'D'): {
+			stbi__uint32 raw_len, bpl;
+			if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+			if (scan != STBI__SCAN_load) return 1;
+			if (z->idata == NULL) return stbi__err("no IDAT", "Corrupt PNG");
+			// initial guess for decoded data size to avoid unnecessary reallocs
+			bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
+			raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
+			z->expanded = (stbi_uc*)stbi_zlib_decode_malloc_guesssize_headerflag((char*)z->idata, ioff, raw_len, (int*)& raw_len, !is_iphone);
+			if (z->expanded == NULL) return 0; // zlib should set error
+			STBI_FREE(z->idata); z->idata = NULL;
+			if ((req_comp == s->img_n + 1 && req_comp != 3 && !pal_img_n) || has_trans)
+				s->img_out_n = s->img_n + 1;
+			else
+				s->img_out_n = s->img_n;
+			if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
+			if (has_trans) {
+				if (z->depth == 16) {
+					if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
+				}
+				else {
+					if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
+				}
+			}
+			if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
+				stbi__de_iphone(z);
+			if (pal_img_n) {
+				// pal_img_n == 3 or 4
+				s->img_n = pal_img_n; // record the actual colors we had
+				s->img_out_n = pal_img_n;
+				if (req_comp >= 3) s->img_out_n = req_comp;
+				if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
+					return 0;
+			}
+			else if (has_trans) {
+				// non-paletted image with tRNS -> source image has (constant) alpha
+				++s->img_n;
+			}
+			STBI_FREE(z->expanded); z->expanded = NULL;
+			return 1;
+		}
+
+		default:
+			// if critical, fail
+			if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+			if ((c.type & (1 << 29)) == 0) {
+#ifndef STBI_NO_FAILURE_STRINGS
+				// not threadsafe
+				static char invalid_chunk[] = "XXXX PNG chunk not known";
+				invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
+				invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
+				invalid_chunk[2] = STBI__BYTECAST(c.type >> 8);
+				invalid_chunk[3] = STBI__BYTECAST(c.type >> 0);
+#endif
+				return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
+			}
+			stbi__skip(s, c.length);
+			break;
+		}
+		// end of PNG chunk, read and skip CRC
+		stbi__get32be(s);
+	}
+}
+
+static void* stbi__do_png(stbi__png* p, int* x, int* y, int* n, int req_comp, stbi__result_info* ri)
+{
+	void* result = NULL;
+	if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
+	if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
+		if (p->depth < 8)
+			ri->bits_per_channel = 8;
+		else
+			ri->bits_per_channel = p->depth;
+		result = p->out;
+		p->out = NULL;
+		if (req_comp && req_comp != p->s->img_out_n) {
+			if (ri->bits_per_channel == 8)
+				result = stbi__convert_format((unsigned char*)result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
+			else
+				result = stbi__convert_format16((stbi__uint16*)result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
+			p->s->img_out_n = req_comp;
+			if (result == NULL) return result;
+		}
+		*x = p->s->img_x;
+		*y = p->s->img_y;
+		if (n)* n = p->s->img_n;
+	}
+	STBI_FREE(p->out);      p->out = NULL;
+	STBI_FREE(p->expanded); p->expanded = NULL;
+	STBI_FREE(p->idata);    p->idata = NULL;
+
+	return result;
+}
+
+static void* stbi__png_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri)
+{
+	stbi__png p;
+	p.s = s;
+	return stbi__do_png(&p, x, y, comp, req_comp, ri);
+}
+
+static int stbi__png_test(stbi__context* s)
+{
+	int r;
+	r = stbi__check_png_header(s);
+	stbi__rewind(s);
+	return r;
+}
+
+static int stbi__png_info_raw(stbi__png* p, int* x, int* y, int* comp)
+{
+	if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
+		stbi__rewind(p->s);
+		return 0;
+	}
+	if (x)* x = p->s->img_x;
+	if (y)* y = p->s->img_y;
+	if (comp)* comp = p->s->img_n;
+	return 1;
+}
+
+static int stbi__png_info(stbi__context* s, int* x, int* y, int* comp)
+{
+	stbi__png p;
+	p.s = s;
+	return stbi__png_info_raw(&p, x, y, comp);
+}
+
+static int stbi__png_is16(stbi__context* s)
+{
+	stbi__png p;
+	p.s = s;
+	if (!stbi__png_info_raw(&p, NULL, NULL, NULL))
+		return 0;
+	if (p.depth != 16) {
+		stbi__rewind(p.s);
+		return 0;
+	}
+	return 1;
+}
+#endif
+
+// Microsoft/Windows BMP image
+
+#ifndef STBI_NO_BMP
+static int stbi__bmp_test_raw(stbi__context* s)
+{
+	int r;
+	int sz;
+	if (stbi__get8(s) != 'B') return 0;
+	if (stbi__get8(s) != 'M') return 0;
+	stbi__get32le(s); // discard filesize
+	stbi__get16le(s); // discard reserved
+	stbi__get16le(s); // discard reserved
+	stbi__get32le(s); // discard data offset
+	sz = stbi__get32le(s);
+	r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
+	return r;
+}
+
+static int stbi__bmp_test(stbi__context* s)
+{
+	int r = stbi__bmp_test_raw(s);
+	stbi__rewind(s);
+	return r;
+}
+
+
+// returns 0..31 for the highest set bit
+static int stbi__high_bit(unsigned int z)
+{
+	int n = 0;
+	if (z == 0) return -1;
+	if (z >= 0x10000) n += 16, z >>= 16;
+	if (z >= 0x00100) n += 8, z >>= 8;
+	if (z >= 0x00010) n += 4, z >>= 4;
+	if (z >= 0x00004) n += 2, z >>= 2;
+	if (z >= 0x00002) n += 1, z >>= 1;
+	return n;
+}
+
+static int stbi__bitcount(unsigned int a)
+{
+	a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2
+	a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4
+	a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
+	a = (a + (a >> 8)); // max 16 per 8 bits
+	a = (a + (a >> 16)); // max 32 per 8 bits
+	return a & 0xff;
+}
+
+// extract an arbitrarily-aligned N-bit value (N=bits)
+// from v, and then make it 8-bits long and fractionally
+// extend it to full full range.
+static int stbi__shiftsigned(int v, int shift, int bits)
+{
+	static unsigned int mul_table[9] = {
+	   0,
+	   0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/,
+	   0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/,
+	};
+	static unsigned int shift_table[9] = {
+	   0, 0,0,1,0,2,4,6,0,
+	};
+	if (shift < 0)
+		v <<= -shift;
+	else
+		v >>= shift;
+	STBI_ASSERT(v >= 0 && v < 256);
+	v >>= (8 - bits);
+	STBI_ASSERT(bits >= 0 && bits <= 8);
+	return (int)((unsigned)v * mul_table[bits]) >> shift_table[bits];
+}
+
+typedef struct
+{
+	int bpp, offset, hsz;
+	unsigned int mr, mg, mb, ma, all_a;
+} stbi__bmp_data;
+
+static void* stbi__bmp_parse_header(stbi__context* s, stbi__bmp_data* info)
+{
+	int hsz;
+	if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
+	stbi__get32le(s); // discard filesize
+	stbi__get16le(s); // discard reserved
+	stbi__get16le(s); // discard reserved
+	info->offset = stbi__get32le(s);
+	info->hsz = hsz = stbi__get32le(s);
+	info->mr = info->mg = info->mb = info->ma = 0;
+
+	if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
+	if (hsz == 12) {
+		s->img_x = stbi__get16le(s);
+		s->img_y = stbi__get16le(s);
+	}
+	else {
+		s->img_x = stbi__get32le(s);
+		s->img_y = stbi__get32le(s);
+	}
+	if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
+	info->bpp = stbi__get16le(s);
+	if (hsz != 12) {
+		int compress = stbi__get32le(s);
+		if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
+		stbi__get32le(s); // discard sizeof
+		stbi__get32le(s); // discard hres
+		stbi__get32le(s); // discard vres
+		stbi__get32le(s); // discard colorsused
+		stbi__get32le(s); // discard max important
+		if (hsz == 40 || hsz == 56) {
+			if (hsz == 56) {
+				stbi__get32le(s);
+				stbi__get32le(s);
+				stbi__get32le(s);
+				stbi__get32le(s);
+			}
+			if (info->bpp == 16 || info->bpp == 32) {
+				if (compress == 0) {
+					if (info->bpp == 32) {
+						info->mr = 0xffu << 16;
+						info->mg = 0xffu << 8;
+						info->mb = 0xffu << 0;
+						info->ma = 0xffu << 24;
+						info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
+					}
+					else {
+						info->mr = 31u << 10;
+						info->mg = 31u << 5;
+						info->mb = 31u << 0;
+					}
+				}
+				else if (compress == 3) {
+					info->mr = stbi__get32le(s);
+					info->mg = stbi__get32le(s);
+					info->mb = stbi__get32le(s);
+					// not documented, but generated by photoshop and handled by mspaint
+					if (info->mr == info->mg && info->mg == info->mb) {
+						// ?!?!?
+						return stbi__errpuc("bad BMP", "bad BMP");
+					}
+				}
+				else
+					return stbi__errpuc("bad BMP", "bad BMP");
+			}
+		}
+		else {
+			int i;
+			if (hsz != 108 && hsz != 124)
+				return stbi__errpuc("bad BMP", "bad BMP");
+			info->mr = stbi__get32le(s);
+			info->mg = stbi__get32le(s);
+			info->mb = stbi__get32le(s);
+			info->ma = stbi__get32le(s);
+			stbi__get32le(s); // discard color space
+			for (i = 0; i < 12; ++i)
+				stbi__get32le(s); // discard color space parameters
+			if (hsz == 124) {
+				stbi__get32le(s); // discard rendering intent
+				stbi__get32le(s); // discard offset of profile data
+				stbi__get32le(s); // discard size of profile data
+				stbi__get32le(s); // discard reserved
+			}
+		}
+	}
+	return (void*)1;
+}
+
+
+static void* stbi__bmp_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri)
+{
+	stbi_uc* out;
+	unsigned int mr = 0, mg = 0, mb = 0, ma = 0, all_a;
+	stbi_uc pal[256][4];
+	int psize = 0, i, j, width;
+	int flip_vertically, pad, target;
+	stbi__bmp_data info;
+	STBI_NOTUSED(ri);
+
+	info.all_a = 255;
+	if (stbi__bmp_parse_header(s, &info) == NULL)
+		return NULL; // error code already set
+
+	flip_vertically = ((int)s->img_y) > 0;
+	s->img_y = abs((int)s->img_y);
+
+	mr = info.mr;
+	mg = info.mg;
+	mb = info.mb;
+	ma = info.ma;
+	all_a = info.all_a;
+
+	if (info.hsz == 12) {
+		if (info.bpp < 24)
+			psize = (info.offset - 14 - 24) / 3;
+	}
+	else {
+		if (info.bpp < 16)
+			psize = (info.offset - 14 - info.hsz) >> 2;
+	}
+
+	s->img_n = ma ? 4 : 3;
+	if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
+		target = req_comp;
+	else
+		target = s->img_n; // if they want monochrome, we'll post-convert
+
+	 // sanity-check size
+	if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
+		return stbi__errpuc("too large", "Corrupt BMP");
+
+	out = (stbi_uc*)stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
+	if (!out) return stbi__errpuc("outofmem", "Out of memory");
+	if (info.bpp < 16) {
+		int z = 0;
+		if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
+		for (i = 0; i < psize; ++i) {
+			pal[i][2] = stbi__get8(s);
+			pal[i][1] = stbi__get8(s);
+			pal[i][0] = stbi__get8(s);
+			if (info.hsz != 12) stbi__get8(s);
+			pal[i][3] = 255;
+		}
+		stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
+		if (info.bpp == 1) width = (s->img_x + 7) >> 3;
+		else if (info.bpp == 4) width = (s->img_x + 1) >> 1;
+		else if (info.bpp == 8) width = s->img_x;
+		else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
+		pad = (-width) & 3;
+		if (info.bpp == 1) {
+			for (j = 0; j < (int)s->img_y; ++j) {
+				int bit_offset = 7, v = stbi__get8(s);
+				for (i = 0; i < (int)s->img_x; ++i) {
+					int color = (v >> bit_offset) & 0x1;
+					out[z++] = pal[color][0];
+					out[z++] = pal[color][1];
+					out[z++] = pal[color][2];
+					if ((--bit_offset) < 0) {
+						bit_offset = 7;
+						v = stbi__get8(s);
+					}
+				}
+				stbi__skip(s, pad);
+			}
+		}
+		else {
+			for (j = 0; j < (int)s->img_y; ++j) {
+				for (i = 0; i < (int)s->img_x; i += 2) {
+					int v = stbi__get8(s), v2 = 0;
+					if (info.bpp == 4) {
+						v2 = v & 15;
+						v >>= 4;
+					}
+					out[z++] = pal[v][0];
+					out[z++] = pal[v][1];
+					out[z++] = pal[v][2];
+					if (target == 4) out[z++] = 255;
+					if (i + 1 == (int)s->img_x) break;
+					v = (info.bpp == 8) ? stbi__get8(s) : v2;
+					out[z++] = pal[v][0];
+					out[z++] = pal[v][1];
+					out[z++] = pal[v][2];
+					if (target == 4) out[z++] = 255;
+				}
+				stbi__skip(s, pad);
+			}
+		}
+	}
+	else {
+		int rshift = 0, gshift = 0, bshift = 0, ashift = 0, rcount = 0, gcount = 0, bcount = 0, acount = 0;
+		int z = 0;
+		int easy = 0;
+		stbi__skip(s, info.offset - 14 - info.hsz);
+		if (info.bpp == 24) width = 3 * s->img_x;
+		else if (info.bpp == 16) width = 2 * s->img_x;
+		else /* bpp = 32 and pad = 0 */ width = 0;
+		pad = (-width) & 3;
+		if (info.bpp == 24) {
+			easy = 1;
+		}
+		else if (info.bpp == 32) {
+			if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
+				easy = 2;
+		}
+		if (!easy) {
+			if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
+			// right shift amt to put high bit in position #7
+			rshift = stbi__high_bit(mr) - 7; rcount = stbi__bitcount(mr);
+			gshift = stbi__high_bit(mg) - 7; gcount = stbi__bitcount(mg);
+			bshift = stbi__high_bit(mb) - 7; bcount = stbi__bitcount(mb);
+			ashift = stbi__high_bit(ma) - 7; acount = stbi__bitcount(ma);
+		}
+		for (j = 0; j < (int)s->img_y; ++j) {
+			if (easy) {
+				for (i = 0; i < (int)s->img_x; ++i) {
+					unsigned char a;
+					out[z + 2] = stbi__get8(s);
+					out[z + 1] = stbi__get8(s);
+					out[z + 0] = stbi__get8(s);
+					z += 3;
+					a = (easy == 2 ? stbi__get8(s) : 255);
+					all_a |= a;
+					if (target == 4) out[z++] = a;
+				}
+			}
+			else {
+				int bpp = info.bpp;
+				for (i = 0; i < (int)s->img_x; ++i) {
+					stbi__uint32 v = (bpp == 16 ? (stbi__uint32)stbi__get16le(s) : stbi__get32le(s));
+					unsigned int a;
+					out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
+					out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
+					out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
+					a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
+					all_a |= a;
+					if (target == 4) out[z++] = STBI__BYTECAST(a);
+				}
+			}
+			stbi__skip(s, pad);
+		}
+	}
+
+	// if alpha channel is all 0s, replace with all 255s
+	if (target == 4 && all_a == 0)
+		for (i = 4 * s->img_x * s->img_y - 1; i >= 0; i -= 4)
+			out[i] = 255;
+
+	if (flip_vertically) {
+		stbi_uc t;
+		for (j = 0; j < (int)s->img_y >> 1; ++j) {
+			stbi_uc* p1 = out + j * s->img_x * target;
+			stbi_uc* p2 = out + (s->img_y - 1 - j) * s->img_x * target;
+			for (i = 0; i < (int)s->img_x * target; ++i) {
+				t = p1[i], p1[i] = p2[i], p2[i] = t;
+			}
+		}
+	}
+
+	if (req_comp && req_comp != target) {
+		out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
+		if (out == NULL) return out; // stbi__convert_format frees input on failure
+	}
+
+	*x = s->img_x;
+	*y = s->img_y;
+	if (comp)* comp = s->img_n;
+	return out;
+}
+#endif
+
+// Targa Truevision - TGA
+// by Jonathan Dummer
+#ifndef STBI_NO_TGA
+// returns STBI_rgb or whatever, 0 on error
+static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
+{
+	// only RGB or RGBA (incl. 16bit) or grey allowed
+	if (is_rgb16)* is_rgb16 = 0;
+	switch (bits_per_pixel) {
+	case 8:  return STBI_grey;
+	case 16: if (is_grey) return STBI_grey_alpha;
+		// fallthrough
+	case 15: if (is_rgb16) * is_rgb16 = 1;
+		return STBI_rgb;
+	case 24: // fallthrough
+	case 32: return bits_per_pixel / 8;
+	default: return 0;
+	}
+}
+
+static int stbi__tga_info(stbi__context* s, int* x, int* y, int* comp)
+{
+	int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
+	int sz, tga_colormap_type;
+	stbi__get8(s);                   // discard Offset
+	tga_colormap_type = stbi__get8(s); // colormap type
+	if (tga_colormap_type > 1) {
+		stbi__rewind(s);
+		return 0;      // only RGB or indexed allowed
+	}
+	tga_image_type = stbi__get8(s); // image type
+	if (tga_colormap_type == 1) { // colormapped (paletted) image
+		if (tga_image_type != 1 && tga_image_type != 9) {
+			stbi__rewind(s);
+			return 0;
+		}
+		stbi__skip(s, 4);       // skip index of first colormap entry and number of entries
+		sz = stbi__get8(s);    //   check bits per palette color entry
+		if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32)) {
+			stbi__rewind(s);
+			return 0;
+		}
+		stbi__skip(s, 4);       // skip image x and y origin
+		tga_colormap_bpp = sz;
+	}
+	else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
+		if ((tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11)) {
+			stbi__rewind(s);
+			return 0; // only RGB or grey allowed, +/- RLE
+		}
+		stbi__skip(s, 9); // skip colormap specification and image x/y origin
+		tga_colormap_bpp = 0;
+	}
+	tga_w = stbi__get16le(s);
+	if (tga_w < 1) {
+		stbi__rewind(s);
+		return 0;   // test width
+	}
+	tga_h = stbi__get16le(s);
+	if (tga_h < 1) {
+		stbi__rewind(s);
+		return 0;   // test height
+	}
+	tga_bits_per_pixel = stbi__get8(s); // bits per pixel
+	stbi__get8(s); // ignore alpha bits
+	if (tga_colormap_bpp != 0) {
+		if ((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
+			// when using a colormap, tga_bits_per_pixel is the size of the indexes
+			// I don't think anything but 8 or 16bit indexes makes sense
+			stbi__rewind(s);
+			return 0;
+		}
+		tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
+	}
+	else {
+		tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
+	}
+	if (!tga_comp) {
+		stbi__rewind(s);
+		return 0;
+	}
+	if (x)* x = tga_w;
+	if (y)* y = tga_h;
+	if (comp)* comp = tga_comp;
+	return 1;                   // seems to have passed everything
+}
+
+static int stbi__tga_test(stbi__context* s)
+{
+	int res = 0;
+	int sz, tga_color_type;
+	stbi__get8(s);      //   discard Offset
+	tga_color_type = stbi__get8(s);   //   color type
+	if (tga_color_type > 1) goto errorEnd;   //   only RGB or indexed allowed
+	sz = stbi__get8(s);   //   image type
+	if (tga_color_type == 1) { // colormapped (paletted) image
+		if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
+		stbi__skip(s, 4);       // skip index of first colormap entry and number of entries
+		sz = stbi__get8(s);    //   check bits per palette color entry
+		if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32)) goto errorEnd;
+		stbi__skip(s, 4);       // skip image x and y origin
+	}
+	else { // "normal" image w/o colormap
+		if ((sz != 2) && (sz != 3) && (sz != 10) && (sz != 11)) goto errorEnd; // only RGB or grey allowed, +/- RLE
+		stbi__skip(s, 9); // skip colormap specification and image x/y origin
+	}
+	if (stbi__get16le(s) < 1) goto errorEnd;      //   test width
+	if (stbi__get16le(s) < 1) goto errorEnd;      //   test height
+	sz = stbi__get8(s);   //   bits per pixel
+	if ((tga_color_type == 1) && (sz != 8) && (sz != 16)) goto errorEnd; // for colormapped images, bpp is size of an index
+	if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32)) goto errorEnd;
+
+	res = 1; // if we got this far, everything's good and we can return 1 instead of 0
+
+errorEnd:
+	stbi__rewind(s);
+	return res;
+}
+
+// read 16bit value and convert to 24bit RGB
+static void stbi__tga_read_rgb16(stbi__context* s, stbi_uc* out)
+{
+	stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
+	stbi__uint16 fiveBitMask = 31;
+	// we have 3 channels with 5bits each
+	int r = (px >> 10) & fiveBitMask;
+	int g = (px >> 5) & fiveBitMask;
+	int b = px & fiveBitMask;
+	// Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
+	out[0] = (stbi_uc)((r * 255) / 31);
+	out[1] = (stbi_uc)((g * 255) / 31);
+	out[2] = (stbi_uc)((b * 255) / 31);
+
+	// some people claim that the most significant bit might be used for alpha
+	// (possibly if an alpha-bit is set in the "image descriptor byte")
+	// but that only made 16bit test images completely translucent..
+	// so let's treat all 15 and 16bit TGAs as RGB with no alpha.
+}
+
+static void* stbi__tga_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri)
+{
+	//   read in the TGA header stuff
+	int tga_offset = stbi__get8(s);
+	int tga_indexed = stbi__get8(s);
+	int tga_image_type = stbi__get8(s);
+	int tga_is_RLE = 0;
+	int tga_palette_start = stbi__get16le(s);
+	int tga_palette_len = stbi__get16le(s);
+	int tga_palette_bits = stbi__get8(s);
+	int tga_x_origin = stbi__get16le(s);
+	int tga_y_origin = stbi__get16le(s);
+	int tga_width = stbi__get16le(s);
+	int tga_height = stbi__get16le(s);
+	int tga_bits_per_pixel = stbi__get8(s);
+	int tga_comp, tga_rgb16 = 0;
+	int tga_inverted = stbi__get8(s);
+	// int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
+	//   image data
+	unsigned char* tga_data;
+	unsigned char* tga_palette = NULL;
+	int i, j;
+	unsigned char raw_data[4] = { 0 };
+	int RLE_count = 0;
+	int RLE_repeating = 0;
+	int read_next_pixel = 1;
+	STBI_NOTUSED(ri);
+
+	//   do a tiny bit of precessing
+	if (tga_image_type >= 8)
+	{
+		tga_image_type -= 8;
+		tga_is_RLE = 1;
+	}
+	tga_inverted = 1 - ((tga_inverted >> 5) & 1);
+
+	//   If I'm paletted, then I'll use the number of bits from the palette
+	if (tga_indexed) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
+	else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
+
+	if (!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
+		return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
+
+	//   tga info
+	*x = tga_width;
+	*y = tga_height;
+	if (comp)* comp = tga_comp;
+
+	if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
+		return stbi__errpuc("too large", "Corrupt TGA");
+
+	tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
+	if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
+
+	// skip to the data's starting position (offset usually = 0)
+	stbi__skip(s, tga_offset);
+
+	if (!tga_indexed && !tga_is_RLE && !tga_rgb16) {
+		for (i = 0; i < tga_height; ++i) {
+			int row = tga_inverted ? tga_height - i - 1 : i;
+			stbi_uc* tga_row = tga_data + row * tga_width * tga_comp;
+			stbi__getn(s, tga_row, tga_width * tga_comp);
+		}
+	}
+	else {
+		//   do I need to load a palette?
+		if (tga_indexed)
+		{
+			//   any data to skip? (offset usually = 0)
+			stbi__skip(s, tga_palette_start);
+			//   load the palette
+			tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
+			if (!tga_palette) {
+				STBI_FREE(tga_data);
+				return stbi__errpuc("outofmem", "Out of memory");
+			}
+			if (tga_rgb16) {
+				stbi_uc* pal_entry = tga_palette;
+				STBI_ASSERT(tga_comp == STBI_rgb);
+				for (i = 0; i < tga_palette_len; ++i) {
+					stbi__tga_read_rgb16(s, pal_entry);
+					pal_entry += tga_comp;
+				}
+			}
+			else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
+				STBI_FREE(tga_data);
+				STBI_FREE(tga_palette);
+				return stbi__errpuc("bad palette", "Corrupt TGA");
+			}
+		}
+		//   load the data
+		for (i = 0; i < tga_width * tga_height; ++i)
+		{
+			//   if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
+			if (tga_is_RLE)
+			{
+				if (RLE_count == 0)
+				{
+					//   yep, get the next byte as a RLE command
+					int RLE_cmd = stbi__get8(s);
+					RLE_count = 1 + (RLE_cmd & 127);
+					RLE_repeating = RLE_cmd >> 7;
+					read_next_pixel = 1;
+				}
+				else if (!RLE_repeating)
+				{
+					read_next_pixel = 1;
+				}
+			}
+			else
+			{
+				read_next_pixel = 1;
+			}
+			//   OK, if I need to read a pixel, do it now
+			if (read_next_pixel)
+			{
+				//   load however much data we did have
+				if (tga_indexed)
+				{
+					// read in index, then perform the lookup
+					int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
+					if (pal_idx >= tga_palette_len) {
+						// invalid index
+						pal_idx = 0;
+					}
+					pal_idx *= tga_comp;
+					for (j = 0; j < tga_comp; ++j) {
+						raw_data[j] = tga_palette[pal_idx + j];
+					}
+				}
+				else if (tga_rgb16) {
+					STBI_ASSERT(tga_comp == STBI_rgb);
+					stbi__tga_read_rgb16(s, raw_data);
+				}
+				else {
+					//   read in the data raw
+					for (j = 0; j < tga_comp; ++j) {
+						raw_data[j] = stbi__get8(s);
+					}
+				}
+				//   clear the reading flag for the next pixel
+				read_next_pixel = 0;
+			} // end of reading a pixel
+
+			// copy data
+			for (j = 0; j < tga_comp; ++j)
+				tga_data[i * tga_comp + j] = raw_data[j];
+
+			//   in case we're in RLE mode, keep counting down
+			--RLE_count;
+		}
+		//   do I need to invert the image?
+		if (tga_inverted)
+		{
+			for (j = 0; j * 2 < tga_height; ++j)
+			{
+				int index1 = j * tga_width * tga_comp;
+				int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
+				for (i = tga_width * tga_comp; i > 0; --i)
+				{
+					unsigned char temp = tga_data[index1];
+					tga_data[index1] = tga_data[index2];
+					tga_data[index2] = temp;
+					++index1;
+					++index2;
+				}
+			}
+		}
+		//   clear my palette, if I had one
+		if (tga_palette != NULL)
+		{
+			STBI_FREE(tga_palette);
+		}
+	}
+
+	// swap RGB - if the source data was RGB16, it already is in the right order
+	if (tga_comp >= 3 && !tga_rgb16)
+	{
+		unsigned char* tga_pixel = tga_data;
+		for (i = 0; i < tga_width * tga_height; ++i)
+		{
+			unsigned char temp = tga_pixel[0];
+			tga_pixel[0] = tga_pixel[2];
+			tga_pixel[2] = temp;
+			tga_pixel += tga_comp;
+		}
+	}
+
+	// convert to target component count
+	if (req_comp && req_comp != tga_comp)
+		tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
+
+	//   the things I do to get rid of an error message, and yet keep
+	//   Microsoft's C compilers happy... [8^(
+	tga_palette_start = tga_palette_len = tga_palette_bits =
+		tga_x_origin = tga_y_origin = 0;
+	//   OK, done
+	return tga_data;
+}
+#endif
+
+// *************************************************************************************************
+// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
+
+#ifndef STBI_NO_PSD
+static int stbi__psd_test(stbi__context* s)
+{
+	int r = (stbi__get32be(s) == 0x38425053);
+	stbi__rewind(s);
+	return r;
+}
+
+static int stbi__psd_decode_rle(stbi__context* s, stbi_uc* p, int pixelCount)
+{
+	int count, nleft, len;
+
+	count = 0;
+	while ((nleft = pixelCount - count) > 0) {
+		len = stbi__get8(s);
+		if (len == 128) {
+			// No-op.
+		}
+		else if (len < 128) {
+			// Copy next len+1 bytes literally.
+			len++;
+			if (len > nleft) return 0; // corrupt data
+			count += len;
+			while (len) {
+				*p = stbi__get8(s);
+				p += 4;
+				len--;
+			}
+		}
+		else if (len > 128) {
+			stbi_uc   val;
+			// Next -len+1 bytes in the dest are replicated from next source byte.
+			// (Interpret len as a negative 8-bit int.)
+			len = 257 - len;
+			if (len > nleft) return 0; // corrupt data
+			val = stbi__get8(s);
+			count += len;
+			while (len) {
+				*p = val;
+				p += 4;
+				len--;
+			}
+		}
+	}
+
+	return 1;
+}
+
+static void* stbi__psd_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri, int bpc)
+{
+	int pixelCount;
+	int channelCount, compression;
+	int channel, i;
+	int bitdepth;
+	int w, h;
+	stbi_uc* out;
+	STBI_NOTUSED(ri);
+
+	// Check identifier
+	if (stbi__get32be(s) != 0x38425053)   // "8BPS"
+		return stbi__errpuc("not PSD", "Corrupt PSD image");
+
+	// Check file type version.
+	if (stbi__get16be(s) != 1)
+		return stbi__errpuc("wrong version", "Unsupported version of PSD image");
+
+	// Skip 6 reserved bytes.
+	stbi__skip(s, 6);
+
+	// Read the number of channels (R, G, B, A, etc).
+	channelCount = stbi__get16be(s);
+	if (channelCount < 0 || channelCount > 16)
+		return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
+
+	// Read the rows and columns of the image.
+	h = stbi__get32be(s);
+	w = stbi__get32be(s);
+
+	// Make sure the depth is 8 bits.
+	bitdepth = stbi__get16be(s);
+	if (bitdepth != 8 && bitdepth != 16)
+		return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
+
+	// Make sure the color mode is RGB.
+	// Valid options are:
+	//   0: Bitmap
+	//   1: Grayscale
+	//   2: Indexed color
+	//   3: RGB color
+	//   4: CMYK color
+	//   7: Multichannel
+	//   8: Duotone
+	//   9: Lab color
+	if (stbi__get16be(s) != 3)
+		return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
+
+	// Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
+	stbi__skip(s, stbi__get32be(s));
+
+	// Skip the image resources.  (resolution, pen tool paths, etc)
+	stbi__skip(s, stbi__get32be(s));
+
+	// Skip the reserved data.
+	stbi__skip(s, stbi__get32be(s));
+
+	// Find out if the data is compressed.
+	// Known values:
+	//   0: no compression
+	//   1: RLE compressed
+	compression = stbi__get16be(s);
+	if (compression > 1)
+		return stbi__errpuc("bad compression", "PSD has an unknown compression format");
+
+	// Check size
+	if (!stbi__mad3sizes_valid(4, w, h, 0))
+		return stbi__errpuc("too large", "Corrupt PSD");
+
+	// Create the destination image.
+
+	if (!compression && bitdepth == 16 && bpc == 16) {
+		out = (stbi_uc*)stbi__malloc_mad3(8, w, h, 0);
+		ri->bits_per_channel = 16;
+	}
+	else
+		out = (stbi_uc*)stbi__malloc(4 * w * h);
+
+	if (!out) return stbi__errpuc("outofmem", "Out of memory");
+	pixelCount = w * h;
+
+	// Initialize the data to zero.
+	//memset( out, 0, pixelCount * 4 );
+
+	// Finally, the image data.
+	if (compression) {
+		// RLE as used by .PSD and .TIFF
+		// Loop until you get the number of unpacked bytes you are expecting:
+		//     Read the next source byte into n.
+		//     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
+		//     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
+		//     Else if n is 128, noop.
+		// Endloop
+
+		// The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
+		// which we're going to just skip.
+		stbi__skip(s, h * channelCount * 2);
+
+		// Read the RLE data by channel.
+		for (channel = 0; channel < 4; channel++) {
+			stbi_uc* p;
+
+			p = out + channel;
+			if (channel >= channelCount) {
+				// Fill this channel with default data.
+				for (i = 0; i < pixelCount; i++, p += 4)
+					* p = (channel == 3 ? 255 : 0);
+			}
+			else {
+				// Read the RLE data.
+				if (!stbi__psd_decode_rle(s, p, pixelCount)) {
+					STBI_FREE(out);
+					return stbi__errpuc("corrupt", "bad RLE data");
+				}
+			}
+		}
+
+	}
+	else {
+		// We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
+		// where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
+
+		// Read the data by channel.
+		for (channel = 0; channel < 4; channel++) {
+			if (channel >= channelCount) {
+				// Fill this channel with default data.
+				if (bitdepth == 16 && bpc == 16) {
+					stbi__uint16* q = ((stbi__uint16*)out) + channel;
+					stbi__uint16 val = channel == 3 ? 65535 : 0;
+					for (i = 0; i < pixelCount; i++, q += 4)
+						* q = val;
+				}
+				else {
+					stbi_uc* p = out + channel;
+					stbi_uc val = channel == 3 ? 255 : 0;
+					for (i = 0; i < pixelCount; i++, p += 4)
+						* p = val;
+				}
+			}
+			else {
+				if (ri->bits_per_channel == 16) {    // output bpc
+					stbi__uint16* q = ((stbi__uint16*)out) + channel;
+					for (i = 0; i < pixelCount; i++, q += 4)
+						* q = (stbi__uint16)stbi__get16be(s);
+				}
+				else {
+					stbi_uc* p = out + channel;
+					if (bitdepth == 16) {  // input bpc
+						for (i = 0; i < pixelCount; i++, p += 4)
+							* p = (stbi_uc)(stbi__get16be(s) >> 8);
+					}
+					else {
+						for (i = 0; i < pixelCount; i++, p += 4)
+							* p = stbi__get8(s);
+					}
+				}
+			}
+		}
+	}
+
+	// remove weird white matte from PSD
+	if (channelCount >= 4) {
+		if (ri->bits_per_channel == 16) {
+			for (i = 0; i < w * h; ++i) {
+				stbi__uint16* pixel = (stbi__uint16*)out + 4 * i;
+				if (pixel[3] != 0 && pixel[3] != 65535) {
+					float a = pixel[3] / 65535.0f;
+					float ra = 1.0f / a;
+					float inv_a = 65535.0f * (1 - ra);
+					pixel[0] = (stbi__uint16)(pixel[0] * ra + inv_a);
+					pixel[1] = (stbi__uint16)(pixel[1] * ra + inv_a);
+					pixel[2] = (stbi__uint16)(pixel[2] * ra + inv_a);
+				}
+			}
+		}
+		else {
+			for (i = 0; i < w * h; ++i) {
+				unsigned char* pixel = out + 4 * i;
+				if (pixel[3] != 0 && pixel[3] != 255) {
+					float a = pixel[3] / 255.0f;
+					float ra = 1.0f / a;
+					float inv_a = 255.0f * (1 - ra);
+					pixel[0] = (unsigned char)(pixel[0] * ra + inv_a);
+					pixel[1] = (unsigned char)(pixel[1] * ra + inv_a);
+					pixel[2] = (unsigned char)(pixel[2] * ra + inv_a);
+				}
+			}
+		}
+	}
+
+	// convert to desired output format
+	if (req_comp && req_comp != 4) {
+		if (ri->bits_per_channel == 16)
+			out = (stbi_uc*)stbi__convert_format16((stbi__uint16*)out, 4, req_comp, w, h);
+		else
+			out = stbi__convert_format(out, 4, req_comp, w, h);
+		if (out == NULL) return out; // stbi__convert_format frees input on failure
+	}
+
+	if (comp)* comp = 4;
+	*y = h;
+	*x = w;
+
+	return out;
+}
+#endif
+
+// *************************************************************************************************
+// Softimage PIC loader
+// by Tom Seddon
+//
+// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
+// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
+
+#ifndef STBI_NO_PIC
+static int stbi__pic_is4(stbi__context* s, const char* str)
+{
+	int i;
+	for (i = 0; i < 4; ++i)
+		if (stbi__get8(s) != (stbi_uc)str[i])
+			return 0;
+
+	return 1;
+}
+
+static int stbi__pic_test_core(stbi__context* s)
+{
+	int i;
+
+	if (!stbi__pic_is4(s, "\x53\x80\xF6\x34"))
+		return 0;
+
+	for (i = 0; i < 84; ++i)
+		stbi__get8(s);
+
+	if (!stbi__pic_is4(s, "PICT"))
+		return 0;
+
+	return 1;
+}
+
+typedef struct
+{
+	stbi_uc size, type, channel;
+} stbi__pic_packet;
+
+static stbi_uc* stbi__readval(stbi__context* s, int channel, stbi_uc* dest)
+{
+	int mask = 0x80, i;
+
+	for (i = 0; i < 4; ++i, mask >>= 1) {
+		if (channel & mask) {
+			if (stbi__at_eof(s)) return stbi__errpuc("bad file", "PIC file too short");
+			dest[i] = stbi__get8(s);
+		}
+	}
+
+	return dest;
+}
+
+static void stbi__copyval(int channel, stbi_uc* dest, const stbi_uc* src)
+{
+	int mask = 0x80, i;
+
+	for (i = 0; i < 4; ++i, mask >>= 1)
+		if (channel & mask)
+			dest[i] = src[i];
+}
+
+static stbi_uc* stbi__pic_load_core(stbi__context* s, int width, int height, int* comp, stbi_uc* result)
+{
+	int act_comp = 0, num_packets = 0, y, chained;
+	stbi__pic_packet packets[10];
+
+	// this will (should...) cater for even some bizarre stuff like having data
+	 // for the same channel in multiple packets.
+	do {
+		stbi__pic_packet* packet;
+
+		if (num_packets == sizeof(packets) / sizeof(packets[0]))
+			return stbi__errpuc("bad format", "too many packets");
+
+		packet = &packets[num_packets++];
+
+		chained = stbi__get8(s);
+		packet->size = stbi__get8(s);
+		packet->type = stbi__get8(s);
+		packet->channel = stbi__get8(s);
+
+		act_comp |= packet->channel;
+
+		if (stbi__at_eof(s))          return stbi__errpuc("bad file", "file too short (reading packets)");
+		if (packet->size != 8)  return stbi__errpuc("bad format", "packet isn't 8bpp");
+	} while (chained);
+
+	*comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
+
+	for (y = 0; y < height; ++y) {
+		int packet_idx;
+
+		for (packet_idx = 0; packet_idx < num_packets; ++packet_idx) {
+			stbi__pic_packet* packet = &packets[packet_idx];
+			stbi_uc* dest = result + y * width * 4;
+
+			switch (packet->type) {
+			default:
+				return stbi__errpuc("bad format", "packet has bad compression type");
+
+			case 0: {//uncompressed
+				int x;
+
+				for (x = 0; x < width; ++x, dest += 4)
+					if (!stbi__readval(s, packet->channel, dest))
+						return 0;
+				break;
+			}
+
+			case 1://Pure RLE
+			{
+				int left = width, i;
+
+				while (left > 0) {
+					stbi_uc count, value[4];
+
+					count = stbi__get8(s);
+					if (stbi__at_eof(s))   return stbi__errpuc("bad file", "file too short (pure read count)");
+
+					if (count > left)
+						count = (stbi_uc)left;
+
+					if (!stbi__readval(s, packet->channel, value))  return 0;
+
+					for (i = 0; i < count; ++i, dest += 4)
+						stbi__copyval(packet->channel, dest, value);
+					left -= count;
+				}
+			}
+			break;
+
+			case 2: {//Mixed RLE
+				int left = width;
+				while (left > 0) {
+					int count = stbi__get8(s), i;
+					if (stbi__at_eof(s))  return stbi__errpuc("bad file", "file too short (mixed read count)");
+
+					if (count >= 128) { // Repeated
+						stbi_uc value[4];
+
+						if (count == 128)
+							count = stbi__get16be(s);
+						else
+							count -= 127;
+						if (count > left)
+							return stbi__errpuc("bad file", "scanline overrun");
+
+						if (!stbi__readval(s, packet->channel, value))
+							return 0;
+
+						for (i = 0; i < count; ++i, dest += 4)
+							stbi__copyval(packet->channel, dest, value);
+					}
+					else { // Raw
+						++count;
+						if (count > left) return stbi__errpuc("bad file", "scanline overrun");
+
+						for (i = 0; i < count; ++i, dest += 4)
+							if (!stbi__readval(s, packet->channel, dest))
+								return 0;
+					}
+					left -= count;
+				}
+				break;
+			}
+			}
+		}
+	}
+
+	return result;
+}
+
+static void* stbi__pic_load(stbi__context* s, int* px, int* py, int* comp, int req_comp, stbi__result_info* ri)
+{
+	stbi_uc* result;
+	int i, x, y, internal_comp;
+	STBI_NOTUSED(ri);
+
+	if (!comp) comp = &internal_comp;
+
+	for (i = 0; i < 92; ++i)
+		stbi__get8(s);
+
+	x = stbi__get16be(s);
+	y = stbi__get16be(s);
+	if (stbi__at_eof(s))  return stbi__errpuc("bad file", "file too short (pic header)");
+	if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
+
+	stbi__get32be(s); //skip `ratio'
+	stbi__get16be(s); //skip `fields'
+	stbi__get16be(s); //skip `pad'
+
+	// intermediate buffer is RGBA
+	result = (stbi_uc*)stbi__malloc_mad3(x, y, 4, 0);
+	memset(result, 0xff, x * y * 4);
+
+	if (!stbi__pic_load_core(s, x, y, comp, result)) {
+		STBI_FREE(result);
+		result = 0;
+	}
+	*px = x;
+	*py = y;
+	if (req_comp == 0) req_comp = *comp;
+	result = stbi__convert_format(result, 4, req_comp, x, y);
+
+	return result;
+}
+
+static int stbi__pic_test(stbi__context* s)
+{
+	int r = stbi__pic_test_core(s);
+	stbi__rewind(s);
+	return r;
+}
+#endif
+
+// *************************************************************************************************
+// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
+
+#ifndef STBI_NO_GIF
+typedef struct
+{
+	stbi__int16 prefix;
+	stbi_uc first;
+	stbi_uc suffix;
+} stbi__gif_lzw;
+
+typedef struct
+{
+	int w, h;
+	stbi_uc* out;                 // output buffer (always 4 components)
+	stbi_uc* background;          // The current "background" as far as a gif is concerned
+	stbi_uc* history;
+	int flags, bgindex, ratio, transparent, eflags;
+	stbi_uc  pal[256][4];
+	stbi_uc lpal[256][4];
+	stbi__gif_lzw codes[8192];
+	stbi_uc* color_table;
+	int parse, step;
+	int lflags;
+	int start_x, start_y;
+	int max_x, max_y;
+	int cur_x, cur_y;
+	int line_size;
+	int delay;
+} stbi__gif;
+
+static int stbi__gif_test_raw(stbi__context* s)
+{
+	int sz;
+	if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
+	sz = stbi__get8(s);
+	if (sz != '9' && sz != '7') return 0;
+	if (stbi__get8(s) != 'a') return 0;
+	return 1;
+}
+
+static int stbi__gif_test(stbi__context* s)
+{
+	int r = stbi__gif_test_raw(s);
+	stbi__rewind(s);
+	return r;
+}
+
+static void stbi__gif_parse_colortable(stbi__context* s, stbi_uc pal[256][4], int num_entries, int transp)
+{
+	int i;
+	for (i = 0; i < num_entries; ++i) {
+		pal[i][2] = stbi__get8(s);
+		pal[i][1] = stbi__get8(s);
+		pal[i][0] = stbi__get8(s);
+		pal[i][3] = transp == i ? 0 : 255;
+	}
+}
+
+static int stbi__gif_header(stbi__context* s, stbi__gif* g, int* comp, int is_info)
+{
+	stbi_uc version;
+	if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
+		return stbi__err("not GIF", "Corrupt GIF");
+
+	version = stbi__get8(s);
+	if (version != '7' && version != '9')    return stbi__err("not GIF", "Corrupt GIF");
+	if (stbi__get8(s) != 'a')                return stbi__err("not GIF", "Corrupt GIF");
+
+	stbi__g_failure_reason = "";
+	g->w = stbi__get16le(s);
+	g->h = stbi__get16le(s);
+	g->flags = stbi__get8(s);
+	g->bgindex = stbi__get8(s);
+	g->ratio = stbi__get8(s);
+	g->transparent = -1;
+
+	if (comp != 0)* comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
+
+	if (is_info) return 1;
+
+	if (g->flags & 0x80)
+		stbi__gif_parse_colortable(s, g->pal, 2 << (g->flags & 7), -1);
+
+	return 1;
+}
+
+static int stbi__gif_info_raw(stbi__context* s, int* x, int* y, int* comp)
+{
+	stbi__gif* g = (stbi__gif*)stbi__malloc(sizeof(stbi__gif));
+	if (!stbi__gif_header(s, g, comp, 1)) {
+		STBI_FREE(g);
+		stbi__rewind(s);
+		return 0;
+	}
+	if (x)* x = g->w;
+	if (y)* y = g->h;
+	STBI_FREE(g);
+	return 1;
+}
+
+static void stbi__out_gif_code(stbi__gif* g, stbi__uint16 code)
+{
+	stbi_uc* p, * c;
+	int idx;
+
+	// recurse to decode the prefixes, since the linked-list is backwards,
+	// and working backwards through an interleaved image would be nasty
+	if (g->codes[code].prefix >= 0)
+		stbi__out_gif_code(g, g->codes[code].prefix);
+
+	if (g->cur_y >= g->max_y) return;
+
+	idx = g->cur_x + g->cur_y;
+	p = &g->out[idx];
+	g->history[idx / 4] = 1;
+
+	c = &g->color_table[g->codes[code].suffix * 4];
+	if (c[3] > 128) { // don't render transparent pixels; 
+		p[0] = c[2];
+		p[1] = c[1];
+		p[2] = c[0];
+		p[3] = c[3];
+	}
+	g->cur_x += 4;
+
+	if (g->cur_x >= g->max_x) {
+		g->cur_x = g->start_x;
+		g->cur_y += g->step;
+
+		while (g->cur_y >= g->max_y && g->parse > 0) {
+			g->step = (1 << g->parse) * g->line_size;
+			g->cur_y = g->start_y + (g->step >> 1);
+			--g->parse;
+		}
+	}
+}
+
+static stbi_uc* stbi__process_gif_raster(stbi__context* s, stbi__gif* g)
+{
+	stbi_uc lzw_cs;
+	stbi__int32 len, init_code;
+	stbi__uint32 first;
+	stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
+	stbi__gif_lzw* p;
+
+	lzw_cs = stbi__get8(s);
+	if (lzw_cs > 12) return NULL;
+	clear = 1 << lzw_cs;
+	first = 1;
+	codesize = lzw_cs + 1;
+	codemask = (1 << codesize) - 1;
+	bits = 0;
+	valid_bits = 0;
+	for (init_code = 0; init_code < clear; init_code++) {
+		g->codes[init_code].prefix = -1;
+		g->codes[init_code].first = (stbi_uc)init_code;
+		g->codes[init_code].suffix = (stbi_uc)init_code;
+	}
+
+	// support no starting clear code
+	avail = clear + 2;
+	oldcode = -1;
+
+	len = 0;
+	for (;;) {
+		if (valid_bits < codesize) {
+			if (len == 0) {
+				len = stbi__get8(s); // start new block
+				if (len == 0)
+					return g->out;
+			}
+			--len;
+			bits |= (stbi__int32)stbi__get8(s) << valid_bits;
+			valid_bits += 8;
+		}
+		else {
+			stbi__int32 code = bits & codemask;
+			bits >>= codesize;
+			valid_bits -= codesize;
+			// @OPTIMIZE: is there some way we can accelerate the non-clear path?
+			if (code == clear) {  // clear code
+				codesize = lzw_cs + 1;
+				codemask = (1 << codesize) - 1;
+				avail = clear + 2;
+				oldcode = -1;
+				first = 0;
+			}
+			else if (code == clear + 1) { // end of stream code
+				stbi__skip(s, len);
+				while ((len = stbi__get8(s)) > 0)
+					stbi__skip(s, len);
+				return g->out;
+			}
+			else if (code <= avail) {
+				if (first) {
+					return stbi__errpuc("no clear code", "Corrupt GIF");
+				}
+
+				if (oldcode >= 0) {
+					p = &g->codes[avail++];
+					if (avail > 8192) {
+						return stbi__errpuc("too many codes", "Corrupt GIF");
+					}
+
+					p->prefix = (stbi__int16)oldcode;
+					p->first = g->codes[oldcode].first;
+					p->suffix = (code == avail) ? p->first : g->codes[code].first;
+				}
+				else if (code == avail)
+					return stbi__errpuc("illegal code in raster", "Corrupt GIF");
+
+				stbi__out_gif_code(g, (stbi__uint16)code);
+
+				if ((avail & codemask) == 0 && avail <= 0x0FFF) {
+					codesize++;
+					codemask = (1 << codesize) - 1;
+				}
+
+				oldcode = code;
+			}
+			else {
+				return stbi__errpuc("illegal code in raster", "Corrupt GIF");
+			}
+		}
+	}
+}
+
+// this function is designed to support animated gifs, although stb_image doesn't support it
+// two back is the image from two frames ago, used for a very specific disposal format
+static stbi_uc* stbi__gif_load_next(stbi__context* s, stbi__gif* g, int* comp, int req_comp, stbi_uc* two_back)
+{
+	int dispose;
+	int first_frame;
+	int pi;
+	int pcount;
+
+	// on first frame, any non-written pixels get the background colour (non-transparent)
+	first_frame = 0;
+	if (g->out == 0) {
+		if (!stbi__gif_header(s, g, comp, 0))     return 0; // stbi__g_failure_reason set by stbi__gif_header
+		g->out = (stbi_uc*)stbi__malloc(4 * g->w * g->h);
+		g->background = (stbi_uc*)stbi__malloc(4 * g->w * g->h);
+		g->history = (stbi_uc*)stbi__malloc(g->w * g->h);
+		if (g->out == 0)                      return stbi__errpuc("outofmem", "Out of memory");
+
+		// image is treated as "tranparent" at the start - ie, nothing overwrites the current background; 
+		// background colour is only used for pixels that are not rendered first frame, after that "background"
+		// color refers to teh color that was there the previous frame. 
+		memset(g->out, 0x00, 4 * g->w * g->h);
+		memset(g->background, 0x00, 4 * g->w * g->h); // state of the background (starts transparent)
+		memset(g->history, 0x00, g->w * g->h);        // pixels that were affected previous frame
+		first_frame = 1;
+	}
+	else {
+		// second frame - how do we dispoase of the previous one?
+		dispose = (g->eflags & 0x1C) >> 2;
+		pcount = g->w * g->h;
+
+		if ((dispose == 3) && (two_back == 0)) {
+			dispose = 2; // if I don't have an image to revert back to, default to the old background
+		}
+
+		if (dispose == 3) { // use previous graphic
+			for (pi = 0; pi < pcount; ++pi) {
+				if (g->history[pi]) {
+					memcpy(&g->out[pi * 4], &two_back[pi * 4], 4);
+				}
+			}
+		}
+		else if (dispose == 2) {
+			// restore what was changed last frame to background before that frame; 
+			for (pi = 0; pi < pcount; ++pi) {
+				if (g->history[pi]) {
+					memcpy(&g->out[pi * 4], &g->background[pi * 4], 4);
+				}
+			}
+		}
+		else {
+			// This is a non-disposal case eithe way, so just 
+			// leave the pixels as is, and they will become the new background
+			// 1: do not dispose
+			// 0:  not specified.
+		}
+
+		// background is what out is after the undoing of the previou frame; 
+		memcpy(g->background, g->out, 4 * g->w * g->h);
+	}
+
+	// clear my history; 
+	memset(g->history, 0x00, g->w * g->h);        // pixels that were affected previous frame
+
+	for (;;) {
+		int tag = stbi__get8(s);
+		switch (tag) {
+		case 0x2C: /* Image Descriptor */
+		{
+			stbi__int32 x, y, w, h;
+			stbi_uc* o;
+
+			x = stbi__get16le(s);
+			y = stbi__get16le(s);
+			w = stbi__get16le(s);
+			h = stbi__get16le(s);
+			if (((x + w) > (g->w)) || ((y + h) > (g->h)))
+				return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
+
+			g->line_size = g->w * 4;
+			g->start_x = x * 4;
+			g->start_y = y * g->line_size;
+			g->max_x = g->start_x + w * 4;
+			g->max_y = g->start_y + h * g->line_size;
+			g->cur_x = g->start_x;
+			g->cur_y = g->start_y;
+
+			g->lflags = stbi__get8(s);
+
+			if (g->lflags & 0x40) {
+				g->step = 8 * g->line_size; // first interlaced spacing
+				g->parse = 3;
+			}
+			else {
+				g->step = g->line_size;
+				g->parse = 0;
+			}
+
+			if (g->lflags & 0x80) {
+				stbi__gif_parse_colortable(s, g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
+				g->color_table = (stbi_uc*)g->lpal;
+			}
+			else if (g->flags & 0x80) {
+				g->color_table = (stbi_uc*)g->pal;
+			}
+			else
+				return stbi__errpuc("missing color table", "Corrupt GIF");
+
+			o = stbi__process_gif_raster(s, g);
+			if (o == NULL) return NULL;
+
+			// if this was the first frame, 
+			pcount = g->w * g->h;
+			if (first_frame && (g->bgindex > 0)) {
+				// if first frame, any pixel not drawn to gets the background color
+				for (pi = 0; pi < pcount; ++pi) {
+					if (g->history[pi] == 0) {
+						g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be; 
+						memcpy(&g->out[pi * 4], &g->pal[g->bgindex], 4);
+					}
+				}
+			}
+
+			return o;
+		}
+
+		case 0x21: // Comment Extension.
+		{
+			int len;
+			int ext = stbi__get8(s);
+			if (ext == 0xF9) { // Graphic Control Extension.
+				len = stbi__get8(s);
+				if (len == 4) {
+					g->eflags = stbi__get8(s);
+					g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths.
+
+					// unset old transparent
+					if (g->transparent >= 0) {
+						g->pal[g->transparent][3] = 255;
+					}
+					if (g->eflags & 0x01) {
+						g->transparent = stbi__get8(s);
+						if (g->transparent >= 0) {
+							g->pal[g->transparent][3] = 0;
+						}
+					}
+					else {
+						// don't need transparent
+						stbi__skip(s, 1);
+						g->transparent = -1;
+					}
+				}
+				else {
+					stbi__skip(s, len);
+					break;
+				}
+			}
+			while ((len = stbi__get8(s)) != 0) {
+				stbi__skip(s, len);
+			}
+			break;
+		}
+
+		case 0x3B: // gif stream termination code
+			return (stbi_uc*)s; // using '1' causes warning on some compilers
+
+		default:
+			return stbi__errpuc("unknown code", "Corrupt GIF");
+		}
+	}
+}
+
+static void* stbi__load_gif_main(stbi__context* s, int** delays, int* x, int* y, int* z, int* comp, int req_comp)
+{
+	if (stbi__gif_test(s)) {
+		int layers = 0;
+		stbi_uc* u = 0;
+		stbi_uc* out = 0;
+		stbi_uc* two_back = 0;
+		stbi__gif g;
+		int stride;
+		memset(&g, 0, sizeof(g));
+		if (delays) {
+			*delays = 0;
+		}
+
+		do {
+			u = stbi__gif_load_next(s, &g, comp, req_comp, two_back);
+			if (u == (stbi_uc*)s) u = 0;  // end of animated gif marker
+
+			if (u) {
+				*x = g.w;
+				*y = g.h;
+				++layers;
+				stride = g.w * g.h * 4;
+
+				if (out) {
+					out = (stbi_uc*)STBI_REALLOC(out, layers * stride);
+					if (delays) {
+						*delays = (int*)STBI_REALLOC(*delays, sizeof(int) * layers);
+					}
+				}
+				else {
+					out = (stbi_uc*)stbi__malloc(layers * stride);
+					if (delays) {
+						*delays = (int*)stbi__malloc(layers * sizeof(int));
+					}
+				}
+				memcpy(out + ((layers - 1) * stride), u, stride);
+				if (layers >= 2) {
+					two_back = out - 2 * stride;
+				}
+
+				if (delays) {
+					(*delays)[layers - 1U] = g.delay;
+				}
+			}
+		} while (u != 0);
+
+		// free temp buffer; 
+		STBI_FREE(g.out);
+		STBI_FREE(g.history);
+		STBI_FREE(g.background);
+
+		// do the final conversion after loading everything; 
+		if (req_comp && req_comp != 4)
+			out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h);
+
+		*z = layers;
+		return out;
+	}
+	else {
+		return stbi__errpuc("not GIF", "Image was not as a gif type.");
+	}
+}
+
+static void* stbi__gif_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri)
+{
+	stbi_uc* u = 0;
+	stbi__gif g;
+	memset(&g, 0, sizeof(g));
+
+	u = stbi__gif_load_next(s, &g, comp, req_comp, 0);
+	if (u == (stbi_uc*)s) u = 0;  // end of animated gif marker
+	if (u) {
+		*x = g.w;
+		*y = g.h;
+
+		// moved conversion to after successful load so that the same
+		// can be done for multiple frames. 
+		if (req_comp && req_comp != 4)
+			u = stbi__convert_format(u, 4, req_comp, g.w, g.h);
+	}
+
+	// free buffers needed for multiple frame loading; 
+	STBI_FREE(g.history);
+	STBI_FREE(g.background);
+
+	return u;
+}
+
+static int stbi__gif_info(stbi__context* s, int* x, int* y, int* comp)
+{
+	return stbi__gif_info_raw(s, x, y, comp);
+}
+#endif
+
+// *************************************************************************************************
+// Radiance RGBE HDR loader
+// originally by Nicolas Schulz
+#ifndef STBI_NO_HDR
+static int stbi__hdr_test_core(stbi__context* s, const char* signature)
+{
+	int i;
+	for (i = 0; signature[i]; ++i)
+		if (stbi__get8(s) != signature[i])
+			return 0;
+	stbi__rewind(s);
+	return 1;
+}
+
+static int stbi__hdr_test(stbi__context* s)
+{
+	int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
+	stbi__rewind(s);
+	if (!r) {
+		r = stbi__hdr_test_core(s, "#?RGBE\n");
+		stbi__rewind(s);
+	}
+	return r;
+}
+
+#define STBI__HDR_BUFLEN  1024
+static char* stbi__hdr_gettoken(stbi__context * z, char* buffer)
+{
+	int len = 0;
+	char c = '\0';
+
+	c = (char)stbi__get8(z);
+
+	while (!stbi__at_eof(z) && c != '\n') {
+		buffer[len++] = c;
+		if (len == STBI__HDR_BUFLEN - 1) {
+			// flush to end of line
+			while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
+				;
+			break;
+		}
+		c = (char)stbi__get8(z);
+	}
+
+	buffer[len] = 0;
+	return buffer;
+}
+
+static void stbi__hdr_convert(float* output, stbi_uc* input, int req_comp)
+{
+	if (input[3] != 0) {
+		float f1;
+		// Exponent
+		f1 = (float)ldexp(1.0f, input[3] - (int)(128 + 8));
+		if (req_comp <= 2)
+			output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
+		else {
+			output[0] = input[0] * f1;
+			output[1] = input[1] * f1;
+			output[2] = input[2] * f1;
+		}
+		if (req_comp == 2) output[1] = 1;
+		if (req_comp == 4) output[3] = 1;
+	}
+	else {
+		switch (req_comp) {
+		case 4: output[3] = 1; /* fallthrough */
+		case 3: output[0] = output[1] = output[2] = 0;
+			break;
+		case 2: output[1] = 1; /* fallthrough */
+		case 1: output[0] = 0;
+			break;
+		}
+	}
+}
+
+static float* stbi__hdr_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri)
+{
+	char buffer[STBI__HDR_BUFLEN];
+	char* token;
+	int valid = 0;
+	int width, height;
+	stbi_uc* scanline;
+	float* hdr_data;
+	int len;
+	unsigned char count, value;
+	int i, j, k, c1, c2, z;
+	const char* headerToken;
+	STBI_NOTUSED(ri);
+
+	// Check identifier
+	headerToken = stbi__hdr_gettoken(s, buffer);
+	if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
+		return stbi__errpf("not HDR", "Corrupt HDR image");
+
+	// Parse header
+	for (;;) {
+		token = stbi__hdr_gettoken(s, buffer);
+		if (token[0] == 0) break;
+		if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
+	}
+
+	if (!valid)    return stbi__errpf("unsupported format", "Unsupported HDR format");
+
+	// Parse width and height
+	// can't use sscanf() if we're not using stdio!
+	token = stbi__hdr_gettoken(s, buffer);
+	if (strncmp(token, "-Y ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
+	token += 3;
+	height = (int)strtol(token, &token, 10);
+	while (*token == ' ') ++token;
+	if (strncmp(token, "+X ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
+	token += 3;
+	width = (int)strtol(token, NULL, 10);
+
+	*x = width;
+	*y = height;
+
+	if (comp)* comp = 3;
+	if (req_comp == 0) req_comp = 3;
+
+	if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
+		return stbi__errpf("too large", "HDR image is too large");
+
+	// Read data
+	hdr_data = (float*)stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
+	if (!hdr_data)
+		return stbi__errpf("outofmem", "Out of memory");
+
+	// Load image data
+	// image data is stored as some number of sca
+	if (width < 8 || width >= 32768) {
+		// Read flat data
+		for (j = 0; j < height; ++j) {
+			for (i = 0; i < width; ++i) {
+				stbi_uc rgbe[4];
+			main_decode_loop:
+				stbi__getn(s, rgbe, 4);
+				stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
+			}
+		}
+	}
+	else {
+		// Read RLE-encoded data
+		scanline = NULL;
+
+		for (j = 0; j < height; ++j) {
+			c1 = stbi__get8(s);
+			c2 = stbi__get8(s);
+			len = stbi__get8(s);
+			if (c1 != 2 || c2 != 2 || (len & 0x80)) {
+				// not run-length encoded, so we have to actually use THIS data as a decoded
+				// pixel (note this can't be a valid pixel--one of RGB must be >= 128)
+				stbi_uc rgbe[4];
+				rgbe[0] = (stbi_uc)c1;
+				rgbe[1] = (stbi_uc)c2;
+				rgbe[2] = (stbi_uc)len;
+				rgbe[3] = (stbi_uc)stbi__get8(s);
+				stbi__hdr_convert(hdr_data, rgbe, req_comp);
+				i = 1;
+				j = 0;
+				STBI_FREE(scanline);
+				goto main_decode_loop; // yes, this makes no sense
+			}
+			len <<= 8;
+			len |= stbi__get8(s);
+			if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
+			if (scanline == NULL) {
+				scanline = (stbi_uc*)stbi__malloc_mad2(width, 4, 0);
+				if (!scanline) {
+					STBI_FREE(hdr_data);
+					return stbi__errpf("outofmem", "Out of memory");
+				}
+			}
+
+			for (k = 0; k < 4; ++k) {
+				int nleft;
+				i = 0;
+				while ((nleft = width - i) > 0) {
+					count = stbi__get8(s);
+					if (count > 128) {
+						// Run
+						value = stbi__get8(s);
+						count -= 128;
+						if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
+						for (z = 0; z < count; ++z)
+							scanline[i++ * 4 + k] = value;
+					}
+					else {
+						// Dump
+						if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
+						for (z = 0; z < count; ++z)
+							scanline[i++ * 4 + k] = stbi__get8(s);
+					}
+				}
+			}
+			for (i = 0; i < width; ++i)
+				stbi__hdr_convert(hdr_data + (j * width + i) * req_comp, scanline + i * 4, req_comp);
+		}
+		if (scanline)
+			STBI_FREE(scanline);
+	}
+
+	return hdr_data;
+}
+
+static int stbi__hdr_info(stbi__context* s, int* x, int* y, int* comp)
+{
+	char buffer[STBI__HDR_BUFLEN];
+	char* token;
+	int valid = 0;
+	int dummy;
+
+	if (!x) x = &dummy;
+	if (!y) y = &dummy;
+	if (!comp) comp = &dummy;
+
+	if (stbi__hdr_test(s) == 0) {
+		stbi__rewind(s);
+		return 0;
+	}
+
+	for (;;) {
+		token = stbi__hdr_gettoken(s, buffer);
+		if (token[0] == 0) break;
+		if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
+	}
+
+	if (!valid) {
+		stbi__rewind(s);
+		return 0;
+	}
+	token = stbi__hdr_gettoken(s, buffer);
+	if (strncmp(token, "-Y ", 3)) {
+		stbi__rewind(s);
+		return 0;
+	}
+	token += 3;
+	*y = (int)strtol(token, &token, 10);
+	while (*token == ' ') ++token;
+	if (strncmp(token, "+X ", 3)) {
+		stbi__rewind(s);
+		return 0;
+	}
+	token += 3;
+	*x = (int)strtol(token, NULL, 10);
+	*comp = 3;
+	return 1;
+}
+#endif // STBI_NO_HDR
+
+#ifndef STBI_NO_BMP
+static int stbi__bmp_info(stbi__context* s, int* x, int* y, int* comp)
+{
+	void* p;
+	stbi__bmp_data info;
+
+	info.all_a = 255;
+	p = stbi__bmp_parse_header(s, &info);
+	stbi__rewind(s);
+	if (p == NULL)
+		return 0;
+	if (x)* x = s->img_x;
+	if (y)* y = s->img_y;
+	if (comp)* comp = info.ma ? 4 : 3;
+	return 1;
+}
+#endif
+
+#ifndef STBI_NO_PSD
+static int stbi__psd_info(stbi__context* s, int* x, int* y, int* comp)
+{
+	int channelCount, dummy, depth;
+	if (!x) x = &dummy;
+	if (!y) y = &dummy;
+	if (!comp) comp = &dummy;
+	if (stbi__get32be(s) != 0x38425053) {
+		stbi__rewind(s);
+		return 0;
+	}
+	if (stbi__get16be(s) != 1) {
+		stbi__rewind(s);
+		return 0;
+	}
+	stbi__skip(s, 6);
+	channelCount = stbi__get16be(s);
+	if (channelCount < 0 || channelCount > 16) {
+		stbi__rewind(s);
+		return 0;
+	}
+	*y = stbi__get32be(s);
+	*x = stbi__get32be(s);
+	depth = stbi__get16be(s);
+	if (depth != 8 && depth != 16) {
+		stbi__rewind(s);
+		return 0;
+	}
+	if (stbi__get16be(s) != 3) {
+		stbi__rewind(s);
+		return 0;
+	}
+	*comp = 4;
+	return 1;
+}
+
+static int stbi__psd_is16(stbi__context* s)
+{
+	int channelCount, depth;
+	if (stbi__get32be(s) != 0x38425053) {
+		stbi__rewind(s);
+		return 0;
+	}
+	if (stbi__get16be(s) != 1) {
+		stbi__rewind(s);
+		return 0;
+	}
+	stbi__skip(s, 6);
+	channelCount = stbi__get16be(s);
+	if (channelCount < 0 || channelCount > 16) {
+		stbi__rewind(s);
+		return 0;
+	}
+	(void)stbi__get32be(s);
+	(void)stbi__get32be(s);
+	depth = stbi__get16be(s);
+	if (depth != 16) {
+		stbi__rewind(s);
+		return 0;
+	}
+	return 1;
+}
+#endif
+
+#ifndef STBI_NO_PIC
+static int stbi__pic_info(stbi__context* s, int* x, int* y, int* comp)
+{
+	int act_comp = 0, num_packets = 0, chained, dummy;
+	stbi__pic_packet packets[10];
+
+	if (!x) x = &dummy;
+	if (!y) y = &dummy;
+	if (!comp) comp = &dummy;
+
+	if (!stbi__pic_is4(s, "\x53\x80\xF6\x34")) {
+		stbi__rewind(s);
+		return 0;
+	}
+
+	stbi__skip(s, 88);
+
+	*x = stbi__get16be(s);
+	*y = stbi__get16be(s);
+	if (stbi__at_eof(s)) {
+		stbi__rewind(s);
+		return 0;
+	}
+	if ((*x) != 0 && (1 << 28) / (*x) < (*y)) {
+		stbi__rewind(s);
+		return 0;
+	}
+
+	stbi__skip(s, 8);
+
+	do {
+		stbi__pic_packet* packet;
+
+		if (num_packets == sizeof(packets) / sizeof(packets[0]))
+			return 0;
+
+		packet = &packets[num_packets++];
+		chained = stbi__get8(s);
+		packet->size = stbi__get8(s);
+		packet->type = stbi__get8(s);
+		packet->channel = stbi__get8(s);
+		act_comp |= packet->channel;
+
+		if (stbi__at_eof(s)) {
+			stbi__rewind(s);
+			return 0;
+		}
+		if (packet->size != 8) {
+			stbi__rewind(s);
+			return 0;
+		}
+	} while (chained);
+
+	*comp = (act_comp & 0x10 ? 4 : 3);
+
+	return 1;
+}
+#endif
+
+// *************************************************************************************************
+// Portable Gray Map and Portable Pixel Map loader
+// by Ken Miller
+//
+// PGM: http://netpbm.sourceforge.net/doc/pgm.html
+// PPM: http://netpbm.sourceforge.net/doc/ppm.html
+//
+// Known limitations:
+//    Does not support comments in the header section
+//    Does not support ASCII image data (formats P2 and P3)
+//    Does not support 16-bit-per-channel
+
+#ifndef STBI_NO_PNM
+
+static int      stbi__pnm_test(stbi__context* s)
+{
+	char p, t;
+	p = (char)stbi__get8(s);
+	t = (char)stbi__get8(s);
+	if (p != 'P' || (t != '5' && t != '6')) {
+		stbi__rewind(s);
+		return 0;
+	}
+	return 1;
+}
+
+static void* stbi__pnm_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri)
+{
+	stbi_uc* out;
+	STBI_NOTUSED(ri);
+
+	if (!stbi__pnm_info(s, (int*)& s->img_x, (int*)& s->img_y, (int*)& s->img_n))
+		return 0;
+
+	*x = s->img_x;
+	*y = s->img_y;
+	if (comp)* comp = s->img_n;
+
+	if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0))
+		return stbi__errpuc("too large", "PNM too large");
+
+	out = (stbi_uc*)stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0);
+	if (!out) return stbi__errpuc("outofmem", "Out of memory");
+	stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
+
+	if (req_comp && req_comp != s->img_n) {
+		out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
+		if (out == NULL) return out; // stbi__convert_format frees input on failure
+	}
+	return out;
+}
+
+static int      stbi__pnm_isspace(char c)
+{
+	return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
+}
+
+static void     stbi__pnm_skip_whitespace(stbi__context* s, char* c)
+{
+	for (;;) {
+		while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
+			* c = (char)stbi__get8(s);
+
+		if (stbi__at_eof(s) || *c != '#')
+			break;
+
+		while (!stbi__at_eof(s) && *c != '\n' && *c != '\r')
+			* c = (char)stbi__get8(s);
+	}
+}
+
+static int      stbi__pnm_isdigit(char c)
+{
+	return c >= '0' && c <= '9';
+}
+
+static int      stbi__pnm_getinteger(stbi__context* s, char* c)
+{
+	int value = 0;
+
+	while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
+		value = value * 10 + (*c - '0');
+		*c = (char)stbi__get8(s);
+	}
+
+	return value;
+}
+
+static int      stbi__pnm_info(stbi__context* s, int* x, int* y, int* comp)
+{
+	int maxv, dummy;
+	char c, p, t;
+
+	if (!x) x = &dummy;
+	if (!y) y = &dummy;
+	if (!comp) comp = &dummy;
+
+	stbi__rewind(s);
+
+	// Get identifier
+	p = (char)stbi__get8(s);
+	t = (char)stbi__get8(s);
+	if (p != 'P' || (t != '5' && t != '6')) {
+		stbi__rewind(s);
+		return 0;
+	}
+
+	*comp = (t == '6') ? 3 : 1;  // '5' is 1-component .pgm; '6' is 3-component .ppm
+
+	c = (char)stbi__get8(s);
+	stbi__pnm_skip_whitespace(s, &c);
+
+	*x = stbi__pnm_getinteger(s, &c); // read width
+	stbi__pnm_skip_whitespace(s, &c);
+
+	*y = stbi__pnm_getinteger(s, &c); // read height
+	stbi__pnm_skip_whitespace(s, &c);
+
+	maxv = stbi__pnm_getinteger(s, &c);  // read max value
+
+	if (maxv > 255)
+		return stbi__err("max value > 255", "PPM image not 8-bit");
+	else
+		return 1;
+}
+#endif
+
+static int stbi__info_main(stbi__context* s, int* x, int* y, int* comp)
+{
+#ifndef STBI_NO_JPEG
+	if (stbi__jpeg_info(s, x, y, comp)) return 1;
+#endif
+
+#ifndef STBI_NO_PNG
+	if (stbi__png_info(s, x, y, comp))  return 1;
+#endif
+
+#ifndef STBI_NO_GIF
+	if (stbi__gif_info(s, x, y, comp))  return 1;
+#endif
+
+#ifndef STBI_NO_BMP
+	if (stbi__bmp_info(s, x, y, comp))  return 1;
+#endif
+
+#ifndef STBI_NO_PSD
+	if (stbi__psd_info(s, x, y, comp))  return 1;
+#endif
+
+#ifndef STBI_NO_PIC
+	if (stbi__pic_info(s, x, y, comp))  return 1;
+#endif
+
+#ifndef STBI_NO_PNM
+	if (stbi__pnm_info(s, x, y, comp))  return 1;
+#endif
+
+#ifndef STBI_NO_HDR
+	if (stbi__hdr_info(s, x, y, comp))  return 1;
+#endif
+
+	// test tga last because it's a crappy test!
+#ifndef STBI_NO_TGA
+	if (stbi__tga_info(s, x, y, comp))
+		return 1;
+#endif
+	return stbi__err("unknown image type", "Image not of any known type, or corrupt");
+}
+
+static int stbi__is_16_main(stbi__context* s)
+{
+#ifndef STBI_NO_PNG
+	if (stbi__png_is16(s))  return 1;
+#endif
+
+#ifndef STBI_NO_PSD
+	if (stbi__psd_is16(s))  return 1;
+#endif
+
+	return 0;
+}
+
+#ifndef STBI_NO_STDIO
+STBIDEF int stbi_info(char const* filename, int* x, int* y, int* comp)
+{
+	FILE* f = stbi__fopen(filename, "rb");
+	int result;
+	if (!f) return stbi__err("can't fopen", "Unable to open file");
+	result = stbi_info_from_file(f, x, y, comp);
+	fclose(f);
+	return result;
+}
+
+STBIDEF int stbi_info_from_file(FILE* f, int* x, int* y, int* comp)
+{
+	int r;
+	stbi__context s;
+	long pos = ftell(f);
+	stbi__start_file(&s, f);
+	r = stbi__info_main(&s, x, y, comp);
+	fseek(f, pos, SEEK_SET);
+	return r;
+}
+
+STBIDEF int stbi_is_16_bit(char const* filename)
+{
+	FILE* f = stbi__fopen(filename, "rb");
+	int result;
+	if (!f) return stbi__err("can't fopen", "Unable to open file");
+	result = stbi_is_16_bit_from_file(f);
+	fclose(f);
+	return result;
+}
+
+STBIDEF int stbi_is_16_bit_from_file(FILE* f)
+{
+	int r;
+	stbi__context s;
+	long pos = ftell(f);
+	stbi__start_file(&s, f);
+	r = stbi__is_16_main(&s);
+	fseek(f, pos, SEEK_SET);
+	return r;
+}
+#endif // !STBI_NO_STDIO
+
+STBIDEF int stbi_info_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* comp)
+{
+	stbi__context s;
+	stbi__start_mem(&s, buffer, len);
+	return stbi__info_main(&s, x, y, comp);
+}
+
+STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const* c, void* user, int* x, int* y, int* comp)
+{
+	stbi__context s;
+	stbi__start_callbacks(&s, (stbi_io_callbacks*)c, user);
+	return stbi__info_main(&s, x, y, comp);
+}
+
+STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const* buffer, int len)
+{
+	stbi__context s;
+	stbi__start_mem(&s, buffer, len);
+	return stbi__is_16_main(&s);
+}
+
+STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const* c, void* user)
+{
+	stbi__context s;
+	stbi__start_callbacks(&s, (stbi_io_callbacks*)c, user);
+	return stbi__is_16_main(&s);
+}
+
+#endif // STB_IMAGE_IMPLEMENTATION
+
+/*
+   revision history:
+	  2.19  (2018-02-11) fix warning
+	  2.18  (2018-01-30) fix warnings
+	  2.17  (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug
+						 1-bit BMP
+						 *_is_16_bit api
+						 avoid warnings
+	  2.16  (2017-07-23) all functions have 16-bit variants;
+						 STBI_NO_STDIO works again;
+						 compilation fixes;
+						 fix rounding in unpremultiply;
+						 optimize vertical flip;
+						 disable raw_len validation;
+						 documentation fixes
+	  2.15  (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
+						 warning fixes; disable run-time SSE detection on gcc;
+						 uniform handling of optional "return" values;
+						 thread-safe initialization of zlib tables
+	  2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
+	  2.13  (2016-11-29) add 16-bit API, only supported for PNG right now
+	  2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
+	  2.11  (2016-04-02) allocate large structures on the stack
+						 remove white matting for transparent PSD
+						 fix reported channel count for PNG & BMP
+						 re-enable SSE2 in non-gcc 64-bit
+						 support RGB-formatted JPEG
+						 read 16-bit PNGs (only as 8-bit)
+	  2.10  (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
+	  2.09  (2016-01-16) allow comments in PNM files
+						 16-bit-per-pixel TGA (not bit-per-component)
+						 info() for TGA could break due to .hdr handling
+						 info() for BMP to shares code instead of sloppy parse
+						 can use STBI_REALLOC_SIZED if allocator doesn't support realloc
+						 code cleanup
+	  2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
+	  2.07  (2015-09-13) fix compiler warnings
+						 partial animated GIF support
+						 limited 16-bpc PSD support
+						 #ifdef unused functions
+						 bug with < 92 byte PIC,PNM,HDR,TGA
+	  2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
+	  2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
+	  2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
+	  2.03  (2015-04-12) extra corruption checking (mmozeiko)
+						 stbi_set_flip_vertically_on_load (nguillemot)
+						 fix NEON support; fix mingw support
+	  2.02  (2015-01-19) fix incorrect assert, fix warning
+	  2.01  (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
+	  2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
+	  2.00  (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
+						 progressive JPEG (stb)
+						 PGM/PPM support (Ken Miller)
+						 STBI_MALLOC,STBI_REALLOC,STBI_FREE
+						 GIF bugfix -- seemingly never worked
+						 STBI_NO_*, STBI_ONLY_*
+	  1.48  (2014-12-14) fix incorrectly-named assert()
+	  1.47  (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
+						 optimize PNG (ryg)
+						 fix bug in interlaced PNG with user-specified channel count (stb)
+	  1.46  (2014-08-26)
+			  fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
+	  1.45  (2014-08-16)
+			  fix MSVC-ARM internal compiler error by wrapping malloc
+	  1.44  (2014-08-07)
+			  various warning fixes from Ronny Chevalier
+	  1.43  (2014-07-15)
+			  fix MSVC-only compiler problem in code changed in 1.42
+	  1.42  (2014-07-09)
+			  don't define _CRT_SECURE_NO_WARNINGS (affects user code)
+			  fixes to stbi__cleanup_jpeg path
+			  added STBI_ASSERT to avoid requiring assert.h
+	  1.41  (2014-06-25)
+			  fix search&replace from 1.36 that messed up comments/error messages
+	  1.40  (2014-06-22)
+			  fix gcc struct-initialization warning
+	  1.39  (2014-06-15)
+			  fix to TGA optimization when req_comp != number of components in TGA;
+			  fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
+			  add support for BMP version 5 (more ignored fields)
+	  1.38  (2014-06-06)
+			  suppress MSVC warnings on integer casts truncating values
+			  fix accidental rename of 'skip' field of I/O
+	  1.37  (2014-06-04)
+			  remove duplicate typedef
+	  1.36  (2014-06-03)
+			  convert to header file single-file library
+			  if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
+	  1.35  (2014-05-27)
+			  various warnings
+			  fix broken STBI_SIMD path
+			  fix bug where stbi_load_from_file no longer left file pointer in correct place
+			  fix broken non-easy path for 32-bit BMP (possibly never used)
+			  TGA optimization by Arseny Kapoulkine
+	  1.34  (unknown)
+			  use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
+	  1.33  (2011-07-14)
+			  make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
+	  1.32  (2011-07-13)
+			  support for "info" function for all supported filetypes (SpartanJ)
+	  1.31  (2011-06-20)
+			  a few more leak fixes, bug in PNG handling (SpartanJ)
+	  1.30  (2011-06-11)
+			  added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
+			  removed deprecated format-specific test/load functions
+			  removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
+			  error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
+			  fix inefficiency in decoding 32-bit BMP (David Woo)
+	  1.29  (2010-08-16)
+			  various warning fixes from Aurelien Pocheville
+	  1.28  (2010-08-01)
+			  fix bug in GIF palette transparency (SpartanJ)
+	  1.27  (2010-08-01)
+			  cast-to-stbi_uc to fix warnings
+	  1.26  (2010-07-24)
+			  fix bug in file buffering for PNG reported by SpartanJ
+	  1.25  (2010-07-17)
+			  refix trans_data warning (Won Chun)
+	  1.24  (2010-07-12)
+			  perf improvements reading from files on platforms with lock-heavy fgetc()
+			  minor perf improvements for jpeg
+			  deprecated type-specific functions so we'll get feedback if they're needed
+			  attempt to fix trans_data warning (Won Chun)
+	  1.23    fixed bug in iPhone support
+	  1.22  (2010-07-10)
+			  removed image *writing* support
+			  stbi_info support from Jetro Lauha
+			  GIF support from Jean-Marc Lienher
+			  iPhone PNG-extensions from James Brown
+			  warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
+	  1.21    fix use of 'stbi_uc' in header (reported by jon blow)
+	  1.20    added support for Softimage PIC, by Tom Seddon
+	  1.19    bug in interlaced PNG corruption check (found by ryg)
+	  1.18  (2008-08-02)
+			  fix a threading bug (local mutable static)
+	  1.17    support interlaced PNG
+	  1.16    major bugfix - stbi__convert_format converted one too many pixels
+	  1.15    initialize some fields for thread safety
+	  1.14    fix threadsafe conversion bug
+			  header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
+	  1.13    threadsafe
+	  1.12    const qualifiers in the API
+	  1.11    Support installable IDCT, colorspace conversion routines
+	  1.10    Fixes for 64-bit (don't use "unsigned long")
+			  optimized upsampling by Fabian "ryg" Giesen
+	  1.09    Fix format-conversion for PSD code (bad global variables!)
+	  1.08    Thatcher Ulrich's PSD code integrated by Nicolas Schulz
+	  1.07    attempt to fix C++ warning/errors again
+	  1.06    attempt to fix C++ warning/errors again
+	  1.05    fix TGA loading to return correct *comp and use good luminance calc
+	  1.04    default float alpha is 1, not 255; use 'void *' for stbi_image_free
+	  1.03    bugfixes to STBI_NO_STDIO, STBI_NO_HDR
+	  1.02    support for (subset of) HDR files, float interface for preferred access to them
+	  1.01    fix bug: possible bug in handling right-side up bmps... not sure
+			  fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
+	  1.00    interface to zlib that skips zlib header
+	  0.99    correct handling of alpha in palette
+	  0.98    TGA loader by lonesock; dynamically add loaders (untested)
+	  0.97    jpeg errors on too large a file; also catch another malloc failure
+	  0.96    fix detection of invalid v value - particleman@mollyrocket forum
+	  0.95    during header scan, seek to markers in case of padding
+	  0.94    STBI_NO_STDIO to disable stdio usage; rename all #defines the same
+	  0.93    handle jpegtran output; verbose errors
+	  0.92    read 4,8,16,24,32-bit BMP files of several formats
+	  0.91    output 24-bit Windows 3.0 BMP files
+	  0.90    fix a few more warnings; bump version number to approach 1.0
+	  0.61    bugfixes due to Marc LeBlanc, Christopher Lloyd
+	  0.60    fix compiling as c++
+	  0.59    fix warnings: merge Dave Moore's -Wall fixes
+	  0.58    fix bug: zlib uncompressed mode len/nlen was wrong endian
+	  0.57    fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
+	  0.56    fix bug: zlib uncompressed mode len vs. nlen
+	  0.55    fix bug: restart_interval not initialized to 0
+	  0.54    allow NULL for 'int *comp'
+	  0.53    fix bug in png 3->4; speedup png decoding
+	  0.52    png handles req_comp=3,4 directly; minor cleanup; jpeg comments
+	  0.51    obey req_comp requests, 1-component jpegs return as 1-component,
+			  on 'test' only check type, not whether we support this variant
+	  0.50  (2006-11-19)
+			  first released version
+*/
+
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
diff --git a/src/ThirdParty/stb_image_write.hpp b/src/ThirdParty/stb_image_write.hpp
new file mode 100644
index 0000000..764761a
--- /dev/null
+++ b/src/ThirdParty/stb_image_write.hpp
@@ -0,0 +1,7568 @@
+/* stb_image - v2.19 - public domain image loader - http://nothings.org/stb
+								  no warranty implied; use at your own risk
+
+   Do this:
+	  #define STB_IMAGE_IMPLEMENTATION
+   before you include this file in *one* C or C++ file to create the implementation.
+
+   // i.e. it should look like this:
+   #include ...
+   #include ...
+   #include ...
+   #define STB_IMAGE_IMPLEMENTATION
+   #include "stb_image.h"
+
+   You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
+   And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
+
+
+   QUICK NOTES:
+	  Primarily of interest to game developers and other people who can
+		  avoid problematic images and only need the trivial interface
+
+	  JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
+	  PNG 1/2/4/8/16-bit-per-channel
+
+	  TGA (not sure what subset, if a subset)
+	  BMP non-1bpp, non-RLE
+	  PSD (composited view only, no extra channels, 8/16 bit-per-channel)
+
+	  GIF (*comp always reports as 4-channel)
+	  HDR (radiance rgbE format)
+	  PIC (Softimage PIC)
+	  PNM (PPM and PGM binary only)
+
+	  Animated GIF still needs a proper API, but here's one way to do it:
+		  http://gist.github.com/urraka/685d9a6340b26b830d49
+
+	  - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
+	  - decode from arbitrary I/O callbacks
+	  - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
+
+   Full documentation under "DOCUMENTATION" below.
+
+
+LICENSE
+
+  See end of file for license information.
+
+RECENT REVISION HISTORY:
+
+	  2.19  (2018-02-11) fix warning
+	  2.18  (2018-01-30) fix warnings
+	  2.17  (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings
+	  2.16  (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
+	  2.15  (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
+	  2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
+	  2.13  (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
+	  2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
+	  2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
+						 RGB-format JPEG; remove white matting in PSD;
+						 allocate large structures on the stack;
+						 correct channel count for PNG & BMP
+	  2.10  (2016-01-22) avoid warning introduced in 2.09
+	  2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
+
+   See end of file for full revision history.
+
+
+ ============================    Contributors    =========================
+
+ Image formats                          Extensions, features
+	Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info)
+	Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info)
+	Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG)
+	Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks)
+	Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)
+	Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)
+	Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)
+	github:urraka (animated gif)           Junggon Kim (PNM comments)
+	Christopher Forseth (animated gif)     Daniel Gibson (16-bit TGA)
+										   socks-the-fox (16-bit PNG)
+										   Jeremy Sawicki (handle all ImageNet JPGs)
+ Optimizations & bugfixes                  Mikhail Morozov (1-bit BMP)
+	Fabian "ryg" Giesen                    Anael Seghezzi (is-16-bit query)
+	Arseny Kapoulkine
+	John-Mark Allen
+
+ Bug & warning fixes
+	Marc LeBlanc            David Woo          Guillaume George   Martins Mozeiko
+	Christpher Lloyd        Jerry Jansson      Joseph Thomson     Phil Jordan
+	Dave Moore              Roy Eltham         Hayaki Saito       Nathan Reed
+	Won Chun                Luke Graham        Johan Duparc       Nick Verigakis
+	the Horde3D community   Thomas Ruf         Ronny Chevalier    github:rlyeh
+	Janez Zemva             John Bartholomew   Michal Cichon      github:romigrou
+	Jonathan Blow           Ken Hamada         Tero Hanninen      github:svdijk
+	Laurent Gomila          Cort Stratton      Sergio Gonzalez    github:snagar
+	Aruelien Pocheville     Thibault Reuille   Cass Everitt       github:Zelex
+	Ryamond Barbiero        Paul Du Bois       Engin Manap        github:grim210
+	Aldo Culquicondor       Philipp Wiesemann  Dale Weiler        github:sammyhw
+	Oriol Ferrer Mesia      Josh Tobin         Matthew Gregan     github:phprus
+	Julian Raschke          Gregory Mullen     Baldur Karlsson    github:poppolopoppo
+	Christian Floisand      Kevin Schmidt                         github:darealshinji
+	Blazej Dariusz Roszkowski                                     github:Michaelangel007
+*/
+
+#ifndef STBI_INCLUDE_STB_IMAGE_H
+#define STBI_INCLUDE_STB_IMAGE_H
+
+// DOCUMENTATION
+//
+// Limitations:
+//    - no 12-bit-per-channel JPEG
+//    - no JPEGs with arithmetic coding
+//    - GIF always returns *comp=4
+//
+// Basic usage (see HDR discussion below for HDR usage):
+//    int x,y,n;
+//    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
+//    // ... process data if not NULL ...
+//    // ... x = width, y = height, n = # 8-bit components per pixel ...
+//    // ... replace '0' with '1'..'4' to force that many components per pixel
+//    // ... but 'n' will always be the number that it would have been if you said 0
+//    stbi_image_free(data)
+//
+// Standard parameters:
+//    int *x                 -- outputs image width in pixels
+//    int *y                 -- outputs image height in pixels
+//    int *channels_in_file  -- outputs # of image components in image file
+//    int desired_channels   -- if non-zero, # of image components requested in result
+//
+// The return value from an image loader is an 'unsigned char *' which points
+// to the pixel data, or NULL on an allocation failure or if the image is
+// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
+// with each pixel consisting of N interleaved 8-bit components; the first
+// pixel pointed to is top-left-most in the image. There is no padding between
+// image scanlines or between pixels, regardless of format. The number of
+// components N is 'desired_channels' if desired_channels is non-zero, or
+// *channels_in_file otherwise. If desired_channels is non-zero,
+// *channels_in_file has the number of components that _would_ have been
+// output otherwise. E.g. if you set desired_channels to 4, you will always
+// get RGBA output, but you can check *channels_in_file to see if it's trivially
+// opaque because e.g. there were only 3 channels in the source image.
+//
+// An output image with N components has the following components interleaved
+// in this order in each pixel:
+//
+//     N=#comp     components
+//       1           grey
+//       2           grey, alpha
+//       3           red, green, blue
+//       4           red, green, blue, alpha
+//
+// If image loading fails for any reason, the return value will be NULL,
+// and *x, *y, *channels_in_file will be unchanged. The function
+// stbi_failure_reason() can be queried for an extremely brief, end-user
+// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
+// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
+// more user-friendly ones.
+//
+// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
+//
+// ===========================================================================
+//
+// Philosophy
+//
+// stb libraries are designed with the following priorities:
+//
+//    1. easy to use
+//    2. easy to maintain
+//    3. good performance
+//
+// Sometimes I let "good performance" creep up in priority over "easy to maintain",
+// and for best performance I may provide less-easy-to-use APIs that give higher
+// performance, in addition to the easy to use ones. Nevertheless, it's important
+// to keep in mind that from the standpoint of you, a client of this library,
+// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
+//
+// Some secondary priorities arise directly from the first two, some of which
+// make more explicit reasons why performance can't be emphasized.
+//
+//    - Portable ("ease of use")
+//    - Small source code footprint ("easy to maintain")
+//    - No dependencies ("ease of use")
+//
+// ===========================================================================
+//
+// I/O callbacks
+//
+// I/O callbacks allow you to read from arbitrary sources, like packaged
+// files or some other source. Data read from callbacks are processed
+// through a small internal buffer (currently 128 bytes) to try to reduce
+// overhead.
+//
+// The three functions you must define are "read" (reads some bytes of data),
+// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
+//
+// ===========================================================================
+//
+// SIMD support
+//
+// The JPEG decoder will try to automatically use SIMD kernels on x86 when
+// supported by the compiler. For ARM Neon support, you must explicitly
+// request it.
+//
+// (The old do-it-yourself SIMD API is no longer supported in the current
+// code.)
+//
+// On x86, SSE2 will automatically be used when available based on a run-time
+// test; if not, the generic C versions are used as a fall-back. On ARM targets,
+// the typical path is to have separate builds for NEON and non-NEON devices
+// (at least this is true for iOS and Android). Therefore, the NEON support is
+// toggled by a build flag: define STBI_NEON to get NEON loops.
+//
+// If for some reason you do not want to use any of SIMD code, or if
+// you have issues compiling it, you can disable it entirely by
+// defining STBI_NO_SIMD.
+//
+// ===========================================================================
+//
+// HDR image support   (disable by defining STBI_NO_HDR)
+//
+// stb_image now supports loading HDR images in general, and currently
+// the Radiance .HDR file format, although the support is provided
+// generically. You can still load any file through the existing interface;
+// if you attempt to load an HDR file, it will be automatically remapped to
+// LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
+// both of these constants can be reconfigured through this interface:
+//
+//     stbi_hdr_to_ldr_gamma(2.2f);
+//     stbi_hdr_to_ldr_scale(1.0f);
+//
+// (note, do not use _inverse_ constants; stbi_image will invert them
+// appropriately).
+//
+// Additionally, there is a new, parallel interface for loading files as
+// (linear) floats to preserve the full dynamic range:
+//
+//    float *data = stbi_loadf(filename, &x, &y, &n, 0);
+//
+// If you load LDR images through this interface, those images will
+// be promoted to floating point values, run through the inverse of
+// constants corresponding to the above:
+//
+//     stbi_ldr_to_hdr_scale(1.0f);
+//     stbi_ldr_to_hdr_gamma(2.2f);
+//
+// Finally, given a filename (or an open file or memory block--see header
+// file for details) containing image data, you can query for the "most
+// appropriate" interface to use (that is, whether the image is HDR or
+// not), using:
+//
+//     stbi_is_hdr(char *filename);
+//
+// ===========================================================================
+//
+// iPhone PNG support:
+//
+// By default we convert iphone-formatted PNGs back to RGB, even though
+// they are internally encoded differently. You can disable this conversion
+// by by calling stbi_convert_iphone_png_to_rgb(0), in which case
+// you will always just get the native iphone "format" through (which
+// is BGR stored in RGB).
+//
+// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
+// pixel to remove any premultiplied alpha *only* if the image file explicitly
+// says there's premultiplied data (currently only happens in iPhone images,
+// and only if iPhone convert-to-rgb processing is on).
+//
+// ===========================================================================
+//
+// ADDITIONAL CONFIGURATION
+//
+//  - You can suppress implementation of any of the decoders to reduce
+//    your code footprint by #defining one or more of the following
+//    symbols before creating the implementation.
+//
+//        STBI_NO_JPEG
+//        STBI_NO_PNG
+//        STBI_NO_BMP
+//        STBI_NO_PSD
+//        STBI_NO_TGA
+//        STBI_NO_GIF
+//        STBI_NO_HDR
+//        STBI_NO_PIC
+//        STBI_NO_PNM   (.ppm and .pgm)
+//
+//  - You can request *only* certain decoders and suppress all other ones
+//    (this will be more forward-compatible, as addition of new decoders
+//    doesn't require you to disable them explicitly):
+//
+//        STBI_ONLY_JPEG
+//        STBI_ONLY_PNG
+//        STBI_ONLY_BMP
+//        STBI_ONLY_PSD
+//        STBI_ONLY_TGA
+//        STBI_ONLY_GIF
+//        STBI_ONLY_HDR
+//        STBI_ONLY_PIC
+//        STBI_ONLY_PNM   (.ppm and .pgm)
+//
+//   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
+//     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
+//
+
+
+#ifndef STBI_NO_STDIO
+#include <stdio.h>
+#endif // STBI_NO_STDIO
+
+#define STBI_VERSION 1
+
+enum
+{
+	STBI_default = 0, // only used for desired_channels
+
+	STBI_grey = 1,
+	STBI_grey_alpha = 2,
+	STBI_rgb = 3,
+	STBI_rgb_alpha = 4
+};
+
+typedef unsigned char stbi_uc;
+typedef unsigned short stbi_us;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef STB_IMAGE_STATIC
+#define STBIDEF static
+#else
+#define STBIDEF extern
+#endif
+
+	//////////////////////////////////////////////////////////////////////////////
+	//
+	// PRIMARY API - works on images of any type
+	//
+
+	//
+	// load image by filename, open file, or memory buffer
+	//
+
+	typedef struct
+	{
+		int      (*read)  (void* user, char* data, int size);   // fill 'data' with 'size' bytes.  return number of bytes actually read
+		void     (*skip)  (void* user, int n);                 // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
+		int      (*eof)   (void* user);                       // returns nonzero if we are at end of file/data
+	} stbi_io_callbacks;
+
+	////////////////////////////////////
+	//
+	// 8-bits-per-channel interface
+	//
+
+	STBIDEF stbi_uc* stbi_load_from_memory(stbi_uc           const* buffer, int len, int* x, int* y, int* channels_in_file, int desired_channels);
+	STBIDEF stbi_uc* stbi_load_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* channels_in_file, int desired_channels);
+#ifndef STBI_NO_GIF
+	STBIDEF stbi_uc* stbi_load_gif_from_memory(stbi_uc const* buffer, int len, int** delays, int* x, int* y, int* z, int* comp, int req_comp);
+#endif
+
+
+#ifndef STBI_NO_STDIO
+	STBIDEF stbi_uc* stbi_load(char const* filename, int* x, int* y, int* channels_in_file, int desired_channels);
+	STBIDEF stbi_uc* stbi_load_from_file(FILE* f, int* x, int* y, int* channels_in_file, int desired_channels);
+	// for stbi_load_from_file, file pointer is left pointing immediately after image
+#endif
+
+////////////////////////////////////
+//
+// 16-bits-per-channel interface
+//
+
+	STBIDEF stbi_us* stbi_load_16_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* channels_in_file, int desired_channels);
+	STBIDEF stbi_us* stbi_load_16_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* channels_in_file, int desired_channels);
+
+#ifndef STBI_NO_STDIO
+	STBIDEF stbi_us* stbi_load_16(char const* filename, int* x, int* y, int* channels_in_file, int desired_channels);
+	STBIDEF stbi_us* stbi_load_from_file_16(FILE* f, int* x, int* y, int* channels_in_file, int desired_channels);
+#endif
+
+	////////////////////////////////////
+	//
+	// float-per-channel interface
+	//
+#ifndef STBI_NO_LINEAR
+	STBIDEF float* stbi_loadf_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* channels_in_file, int desired_channels);
+	STBIDEF float* stbi_loadf_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* channels_in_file, int desired_channels);
+
+#ifndef STBI_NO_STDIO
+	STBIDEF float* stbi_loadf(char const* filename, int* x, int* y, int* channels_in_file, int desired_channels);
+	STBIDEF float* stbi_loadf_from_file(FILE* f, int* x, int* y, int* channels_in_file, int desired_channels);
+#endif
+#endif
+
+#ifndef STBI_NO_HDR
+	STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma);
+	STBIDEF void   stbi_hdr_to_ldr_scale(float scale);
+#endif // STBI_NO_HDR
+
+#ifndef STBI_NO_LINEAR
+	STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma);
+	STBIDEF void   stbi_ldr_to_hdr_scale(float scale);
+#endif // STBI_NO_LINEAR
+
+	// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
+	STBIDEF int    stbi_is_hdr_from_callbacks(stbi_io_callbacks const* clbk, void* user);
+	STBIDEF int    stbi_is_hdr_from_memory(stbi_uc const* buffer, int len);
+#ifndef STBI_NO_STDIO
+	STBIDEF int      stbi_is_hdr(char const* filename);
+	STBIDEF int      stbi_is_hdr_from_file(FILE* f);
+#endif // STBI_NO_STDIO
+
+
+	// get a VERY brief reason for failure
+	// NOT THREADSAFE
+	STBIDEF const char* stbi_failure_reason(void);
+
+	// free the loaded image -- this is just free()
+	STBIDEF void     stbi_image_free(void* retval_from_stbi_load);
+
+	// get image dimensions & components without fully decoding
+	STBIDEF int      stbi_info_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* comp);
+	STBIDEF int      stbi_info_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* comp);
+	STBIDEF int      stbi_is_16_bit_from_memory(stbi_uc const* buffer, int len);
+	STBIDEF int      stbi_is_16_bit_from_callbacks(stbi_io_callbacks const* clbk, void* user);
+
+#ifndef STBI_NO_STDIO
+	STBIDEF int      stbi_info(char const* filename, int* x, int* y, int* comp);
+	STBIDEF int      stbi_info_from_file(FILE* f, int* x, int* y, int* comp);
+	STBIDEF int      stbi_is_16_bit(char const* filename);
+	STBIDEF int      stbi_is_16_bit_from_file(FILE* f);
+#endif
+
+
+
+	// for image formats that explicitly notate that they have premultiplied alpha,
+	// we just return the colors as stored in the file. set this flag to force
+	// unpremultiplication. results are undefined if the unpremultiply overflow.
+	STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
+
+	// indicate whether we should process iphone images back to canonical format,
+	// or just pass them through "as-is"
+	STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
+
+	// flip the image vertically, so the first pixel in the output array is the bottom left
+	STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
+
+	// ZLIB client - used by PNG, available for other purposes
+
+	STBIDEF char* stbi_zlib_decode_malloc_guesssize(const char* buffer, int len, int initial_size, int* outlen);
+	STBIDEF char* stbi_zlib_decode_malloc_guesssize_headerflag(const char* buffer, int len, int initial_size, int* outlen, int parse_header);
+	STBIDEF char* stbi_zlib_decode_malloc(const char* buffer, int len, int* outlen);
+	STBIDEF int   stbi_zlib_decode_buffer(char* obuffer, int olen, const char* ibuffer, int ilen);
+
+	STBIDEF char* stbi_zlib_decode_noheader_malloc(const char* buffer, int len, int* outlen);
+	STBIDEF int   stbi_zlib_decode_noheader_buffer(char* obuffer, int olen, const char* ibuffer, int ilen);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+//
+//
+////   end header file   /////////////////////////////////////////////////////
+#endif // STBI_INCLUDE_STB_IMAGE_H
+
+#ifdef STB_IMAGE_IMPLEMENTATION
+
+#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
+  || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
+  || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
+  || defined(STBI_ONLY_ZLIB)
+#ifndef STBI_ONLY_JPEG
+#define STBI_NO_JPEG
+#endif
+#ifndef STBI_ONLY_PNG
+#define STBI_NO_PNG
+#endif
+#ifndef STBI_ONLY_BMP
+#define STBI_NO_BMP
+#endif
+#ifndef STBI_ONLY_PSD
+#define STBI_NO_PSD
+#endif
+#ifndef STBI_ONLY_TGA
+#define STBI_NO_TGA
+#endif
+#ifndef STBI_ONLY_GIF
+#define STBI_NO_GIF
+#endif
+#ifndef STBI_ONLY_HDR
+#define STBI_NO_HDR
+#endif
+#ifndef STBI_ONLY_PIC
+#define STBI_NO_PIC
+#endif
+#ifndef STBI_ONLY_PNM
+#define STBI_NO_PNM
+#endif
+#endif
+
+#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
+#define STBI_NO_ZLIB
+#endif
+
+
+#include <stdarg.h>
+#include <stddef.h> // ptrdiff_t on osx
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
+#include <math.h>  // ldexp, pow
+#endif
+
+#ifndef STBI_NO_STDIO
+#include <stdio.h>
+#endif
+
+#ifndef STBI_ASSERT
+#include <assert.h>
+#define STBI_ASSERT(x) assert(x)
+#endif
+
+
+#ifndef _MSC_VER
+#ifdef __cplusplus
+#define stbi_inline inline
+#else
+#define stbi_inline
+#endif
+#else
+#define stbi_inline __forceinline
+#endif
+
+
+#ifdef _MSC_VER
+typedef unsigned short stbi__uint16;
+typedef   signed short stbi__int16;
+typedef unsigned int   stbi__uint32;
+typedef   signed int   stbi__int32;
+#else
+#include <stdint.h>
+typedef uint16_t stbi__uint16;
+typedef int16_t  stbi__int16;
+typedef uint32_t stbi__uint32;
+typedef int32_t  stbi__int32;
+#endif
+
+// should produce compiler error if size is wrong
+typedef unsigned char validate_uint32[sizeof(stbi__uint32) == 4 ? 1 : -1];
+
+#ifdef _MSC_VER
+#define STBI_NOTUSED(v)  (void)(v)
+#else
+#define STBI_NOTUSED(v)  (void)sizeof(v)
+#endif
+
+#ifdef _MSC_VER
+#define STBI_HAS_LROTL
+#endif
+
+#ifdef STBI_HAS_LROTL
+#define stbi_lrot(x,y)  _lrotl(x,y)
+#else
+#define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
+#endif
+
+#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
+// ok
+#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
+// ok
+#else
+#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
+#endif
+
+#ifndef STBI_MALLOC
+#define STBI_MALLOC(sz)           malloc(sz)
+#define STBI_REALLOC(p,newsz)     realloc(p,newsz)
+#define STBI_FREE(p)              free(p)
+#endif
+
+#ifndef STBI_REALLOC_SIZED
+#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
+#endif
+
+// x86/x64 detection
+#if defined(__x86_64__) || defined(_M_X64)
+#define STBI__X64_TARGET
+#elif defined(__i386) || defined(_M_IX86)
+#define STBI__X86_TARGET
+#endif
+
+#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
+// gcc doesn't support sse2 intrinsics unless you compile with -msse2,
+// which in turn means it gets to use SSE2 everywhere. This is unfortunate,
+// but previous attempts to provide the SSE2 functions with runtime
+// detection caused numerous issues. The way architecture extensions are
+// exposed in GCC/Clang is, sadly, not really suited for one-file libs.
+// New behavior: if compiled with -msse2, we use SSE2 without any
+// detection; if not, we don't use it at all.
+#define STBI_NO_SIMD
+#endif
+
+#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
+// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
+//
+// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
+// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
+// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
+// simultaneously enabling "-mstackrealign".
+//
+// See https://github.com/nothings/stb/issues/81 for more information.
+//
+// So default to no SSE2 on 32-bit MinGW. If you've read this far and added
+// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
+#define STBI_NO_SIMD
+#endif
+
+#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
+#define STBI_SSE2
+#include <emmintrin.h>
+
+#ifdef _MSC_VER
+
+#if _MSC_VER >= 1400  // not VC6
+#include <intrin.h> // __cpuid
+static int stbi__cpuid3(void)
+{
+	int info[4];
+	__cpuid(info, 1);
+	return info[3];
+}
+#else
+static int stbi__cpuid3(void)
+{
+	int res;
+	__asm {
+		mov  eax, 1
+		cpuid
+		mov  res, edx
+	}
+	return res;
+}
+#endif
+
+#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
+
+static int stbi__sse2_available(void)
+{
+	int info3 = stbi__cpuid3();
+	return ((info3 >> 26) & 1) != 0;
+}
+#else // assume GCC-style if not VC++
+#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
+
+static int stbi__sse2_available(void)
+{
+	// If we're even attempting to compile this on GCC/Clang, that means
+	// -msse2 is on, which means the compiler is allowed to use SSE2
+	// instructions at will, and so are we.
+	return 1;
+}
+#endif
+#endif
+
+// ARM NEON
+#if defined(STBI_NO_SIMD) && defined(STBI_NEON)
+#undef STBI_NEON
+#endif
+
+#ifdef STBI_NEON
+#include <arm_neon.h>
+// assume GCC or Clang on ARM targets
+#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
+#endif
+
+#ifndef STBI_SIMD_ALIGN
+#define STBI_SIMD_ALIGN(type, name) type name
+#endif
+
+///////////////////////////////////////////////
+//
+//  stbi__context struct and start_xxx functions
+
+// stbi__context structure is our basic context used by all images, so it
+// contains all the IO context, plus some basic image information
+typedef struct
+{
+	stbi__uint32 img_x, img_y;
+	int img_n, img_out_n;
+
+	stbi_io_callbacks io;
+	void* io_user_data;
+
+	int read_from_callbacks;
+	int buflen;
+	stbi_uc buffer_start[128];
+
+	stbi_uc* img_buffer, * img_buffer_end;
+	stbi_uc* img_buffer_original, * img_buffer_original_end;
+} stbi__context;
+
+
+static void stbi__refill_buffer(stbi__context* s);
+
+// initialize a memory-decode context
+static void stbi__start_mem(stbi__context* s, stbi_uc const* buffer, int len)
+{
+	s->io.read = NULL;
+	s->read_from_callbacks = 0;
+	s->img_buffer = s->img_buffer_original = (stbi_uc*)buffer;
+	s->img_buffer_end = s->img_buffer_original_end = (stbi_uc*)buffer + len;
+}
+
+// initialize a callback-based context
+static void stbi__start_callbacks(stbi__context* s, stbi_io_callbacks* c, void* user)
+{
+	s->io = *c;
+	s->io_user_data = user;
+	s->buflen = sizeof(s->buffer_start);
+	s->read_from_callbacks = 1;
+	s->img_buffer_original = s->buffer_start;
+	stbi__refill_buffer(s);
+	s->img_buffer_original_end = s->img_buffer_end;
+}
+
+#ifndef STBI_NO_STDIO
+
+static int stbi__stdio_read(void* user, char* data, int size)
+{
+	return (int)fread(data, 1, size, (FILE*)user);
+}
+
+static void stbi__stdio_skip(void* user, int n)
+{
+	fseek((FILE*)user, n, SEEK_CUR);
+}
+
+static int stbi__stdio_eof(void* user)
+{
+	return feof((FILE*)user);
+}
+
+static stbi_io_callbacks stbi__stdio_callbacks =
+{
+   stbi__stdio_read,
+   stbi__stdio_skip,
+   stbi__stdio_eof,
+};
+
+static void stbi__start_file(stbi__context* s, FILE* f)
+{
+	stbi__start_callbacks(s, &stbi__stdio_callbacks, (void*)f);
+}
+
+//static void stop_file(stbi__context *s) { }
+
+#endif // !STBI_NO_STDIO
+
+static void stbi__rewind(stbi__context* s)
+{
+	// conceptually rewind SHOULD rewind to the beginning of the stream,
+	// but we just rewind to the beginning of the initial buffer, because
+	// we only use it after doing 'test', which only ever looks at at most 92 bytes
+	s->img_buffer = s->img_buffer_original;
+	s->img_buffer_end = s->img_buffer_original_end;
+}
+
+enum
+{
+	STBI_ORDER_RGB,
+	STBI_ORDER_BGR
+};
+
+typedef struct
+{
+	int bits_per_channel;
+	int num_channels;
+	int channel_order;
+} stbi__result_info;
+
+#ifndef STBI_NO_JPEG
+static int      stbi__jpeg_test(stbi__context* s);
+static void* stbi__jpeg_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);
+static int      stbi__jpeg_info(stbi__context* s, int* x, int* y, int* comp);
+#endif
+
+#ifndef STBI_NO_PNG
+static int      stbi__png_test(stbi__context* s);
+static void* stbi__png_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);
+static int      stbi__png_info(stbi__context* s, int* x, int* y, int* comp);
+static int      stbi__png_is16(stbi__context* s);
+#endif
+
+#ifndef STBI_NO_BMP
+static int      stbi__bmp_test(stbi__context* s);
+static void* stbi__bmp_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);
+static int      stbi__bmp_info(stbi__context* s, int* x, int* y, int* comp);
+#endif
+
+#ifndef STBI_NO_TGA
+static int      stbi__tga_test(stbi__context* s);
+static void* stbi__tga_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);
+static int      stbi__tga_info(stbi__context* s, int* x, int* y, int* comp);
+#endif
+
+#ifndef STBI_NO_PSD
+static int      stbi__psd_test(stbi__context* s);
+static void* stbi__psd_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri, int bpc);
+static int      stbi__psd_info(stbi__context* s, int* x, int* y, int* comp);
+static int      stbi__psd_is16(stbi__context* s);
+#endif
+
+#ifndef STBI_NO_HDR
+static int      stbi__hdr_test(stbi__context* s);
+static float* stbi__hdr_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);
+static int      stbi__hdr_info(stbi__context* s, int* x, int* y, int* comp);
+#endif
+
+#ifndef STBI_NO_PIC
+static int      stbi__pic_test(stbi__context* s);
+static void* stbi__pic_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);
+static int      stbi__pic_info(stbi__context* s, int* x, int* y, int* comp);
+#endif
+
+#ifndef STBI_NO_GIF
+static int      stbi__gif_test(stbi__context* s);
+static void* stbi__gif_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);
+static void* stbi__load_gif_main(stbi__context* s, int** delays, int* x, int* y, int* z, int* comp, int req_comp);
+static int      stbi__gif_info(stbi__context* s, int* x, int* y, int* comp);
+#endif
+
+#ifndef STBI_NO_PNM
+static int      stbi__pnm_test(stbi__context* s);
+static void* stbi__pnm_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri);
+static int      stbi__pnm_info(stbi__context* s, int* x, int* y, int* comp);
+#endif
+
+// this is not threadsafe
+static const char* stbi__g_failure_reason;
+
+STBIDEF const char* stbi_failure_reason(void)
+{
+	return stbi__g_failure_reason;
+}
+
+static int stbi__err(const char* str)
+{
+	stbi__g_failure_reason = str;
+	return 0;
+}
+
+static void* stbi__malloc(size_t size)
+{
+	return STBI_MALLOC(size);
+}
+
+// stb_image uses ints pervasively, including for offset calculations.
+// therefore the largest decoded image size we can support with the
+// current code, even on 64-bit targets, is INT_MAX. this is not a
+// significant limitation for the intended use case.
+//
+// we do, however, need to make sure our size calculations don't
+// overflow. hence a few helper functions for size calculations that
+// multiply integers together, making sure that they're non-negative
+// and no overflow occurs.
+
+// return 1 if the sum is valid, 0 on overflow.
+// negative terms are considered invalid.
+static int stbi__addsizes_valid(int a, int b)
+{
+	if (b < 0) return 0;
+	// now 0 <= b <= INT_MAX, hence also
+	// 0 <= INT_MAX - b <= INTMAX.
+	// And "a + b <= INT_MAX" (which might overflow) is the
+	// same as a <= INT_MAX - b (no overflow)
+	return a <= INT_MAX - b;
+}
+
+// returns 1 if the product is valid, 0 on overflow.
+// negative factors are considered invalid.
+static int stbi__mul2sizes_valid(int a, int b)
+{
+	if (a < 0 || b < 0) return 0;
+	if (b == 0) return 1; // mul-by-0 is always safe
+	// portable way to check for no overflows in a*b
+	return a <= INT_MAX / b;
+}
+
+// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
+static int stbi__mad2sizes_valid(int a, int b, int add)
+{
+	return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a * b, add);
+}
+
+// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
+static int stbi__mad3sizes_valid(int a, int b, int c, int add)
+{
+	return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a * b, c) &&
+		stbi__addsizes_valid(a * b * c, add);
+}
+
+// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
+static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
+{
+	return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a * b, c) &&
+		stbi__mul2sizes_valid(a * b * c, d) && stbi__addsizes_valid(a * b * c * d, add);
+}
+#endif
+
+// mallocs with size overflow checking
+static void* stbi__malloc_mad2(int a, int b, int add)
+{
+	if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
+	return stbi__malloc(a * b + add);
+}
+
+static void* stbi__malloc_mad3(int a, int b, int c, int add)
+{
+	if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
+	return stbi__malloc(a * b * c + add);
+}
+
+#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
+static void* stbi__malloc_mad4(int a, int b, int c, int d, int add)
+{
+	if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
+	return stbi__malloc(a * b * c * d + add);
+}
+#endif
+
+// stbi__err - error
+// stbi__errpf - error returning pointer to float
+// stbi__errpuc - error returning pointer to unsigned char
+
+#ifdef STBI_NO_FAILURE_STRINGS
+#define stbi__err(x,y)  0
+#elif defined(STBI_FAILURE_USERMSG)
+#define stbi__err(x,y)  stbi__err(y)
+#else
+#define stbi__err(x,y)  stbi__err(x)
+#endif
+
+#define stbi__errpf(x,y)   ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
+#define stbi__errpuc(x,y)  ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
+
+STBIDEF void stbi_image_free(void* retval_from_stbi_load)
+{
+	STBI_FREE(retval_from_stbi_load);
+}
+
+#ifndef STBI_NO_LINEAR
+static float* stbi__ldr_to_hdr(stbi_uc* data, int x, int y, int comp);
+#endif
+
+#ifndef STBI_NO_HDR
+static stbi_uc* stbi__hdr_to_ldr(float* data, int x, int y, int comp);
+#endif
+
+static int stbi__vertically_flip_on_load = 0;
+
+STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
+{
+	stbi__vertically_flip_on_load = flag_true_if_should_flip;
+}
+
+static void* stbi__load_main(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri, int bpc)
+{
+	memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
+	ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
+	ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
+	ri->num_channels = 0;
+
+#ifndef STBI_NO_JPEG
+	if (stbi__jpeg_test(s)) return stbi__jpeg_load(s, x, y, comp, req_comp, ri);
+#endif
+#ifndef STBI_NO_PNG
+	if (stbi__png_test(s))  return stbi__png_load(s, x, y, comp, req_comp, ri);
+#endif
+#ifndef STBI_NO_BMP
+	if (stbi__bmp_test(s))  return stbi__bmp_load(s, x, y, comp, req_comp, ri);
+#endif
+#ifndef STBI_NO_GIF
+	if (stbi__gif_test(s))  return stbi__gif_load(s, x, y, comp, req_comp, ri);
+#endif
+#ifndef STBI_NO_PSD
+	if (stbi__psd_test(s))  return stbi__psd_load(s, x, y, comp, req_comp, ri, bpc);
+#endif
+#ifndef STBI_NO_PIC
+	if (stbi__pic_test(s))  return stbi__pic_load(s, x, y, comp, req_comp, ri);
+#endif
+#ifndef STBI_NO_PNM
+	if (stbi__pnm_test(s))  return stbi__pnm_load(s, x, y, comp, req_comp, ri);
+#endif
+
+#ifndef STBI_NO_HDR
+	if (stbi__hdr_test(s)) {
+		float* hdr = stbi__hdr_load(s, x, y, comp, req_comp, ri);
+		return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
+	}
+#endif
+
+#ifndef STBI_NO_TGA
+	// test tga last because it's a crappy test!
+	if (stbi__tga_test(s))
+		return stbi__tga_load(s, x, y, comp, req_comp, ri);
+#endif
+
+	return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
+}
+
+static stbi_uc* stbi__convert_16_to_8(stbi__uint16* orig, int w, int h, int channels)
+{
+	int i;
+	int img_len = w * h * channels;
+	stbi_uc* reduced;
+
+	reduced = (stbi_uc*)stbi__malloc(img_len);
+	if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
+
+	for (i = 0; i < img_len; ++i)
+		reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
+
+	STBI_FREE(orig);
+	return reduced;
+}
+
+static stbi__uint16* stbi__convert_8_to_16(stbi_uc* orig, int w, int h, int channels)
+{
+	int i;
+	int img_len = w * h * channels;
+	stbi__uint16* enlarged;
+
+	enlarged = (stbi__uint16*)stbi__malloc(img_len * 2);
+	if (enlarged == NULL) return (stbi__uint16*)stbi__errpuc("outofmem", "Out of memory");
+
+	for (i = 0; i < img_len; ++i)
+		enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
+
+	STBI_FREE(orig);
+	return enlarged;
+}
+
+static void stbi__vertical_flip(void* image, int w, int h, int bytes_per_pixel)
+{
+	int row;
+	size_t bytes_per_row = (size_t)w * bytes_per_pixel;
+	stbi_uc temp[2048];
+	stbi_uc* bytes = (stbi_uc*)image;
+
+	for (row = 0; row < (h >> 1); row++) {
+		stbi_uc* row0 = bytes + row * bytes_per_row;
+		stbi_uc* row1 = bytes + (h - row - 1) * bytes_per_row;
+		// swap row0 with row1
+		size_t bytes_left = bytes_per_row;
+		while (bytes_left) {
+			size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
+			memcpy(temp, row0, bytes_copy);
+			memcpy(row0, row1, bytes_copy);
+			memcpy(row1, temp, bytes_copy);
+			row0 += bytes_copy;
+			row1 += bytes_copy;
+			bytes_left -= bytes_copy;
+		}
+	}
+}
+
+static void stbi__vertical_flip_slices(void* image, int w, int h, int z, int bytes_per_pixel)
+{
+	int slice;
+	int slice_size = w * h * bytes_per_pixel;
+
+	stbi_uc* bytes = (stbi_uc*)image;
+	for (slice = 0; slice < z; ++slice) {
+		stbi__vertical_flip(bytes, w, h, bytes_per_pixel);
+		bytes += slice_size;
+	}
+}
+
+static unsigned char* stbi__load_and_postprocess_8bit(stbi__context* s, int* x, int* y, int* comp, int req_comp)
+{
+	stbi__result_info ri;
+	void* result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
+
+	if (result == NULL)
+		return NULL;
+
+	if (ri.bits_per_channel != 8) {
+		STBI_ASSERT(ri.bits_per_channel == 16);
+		result = stbi__convert_16_to_8((stbi__uint16*)result, *x, *y, req_comp == 0 ? *comp : req_comp);
+		ri.bits_per_channel = 8;
+	}
+
+	// @TODO: move stbi__convert_format to here
+
+	if (stbi__vertically_flip_on_load) {
+		int channels = req_comp ? req_comp : *comp;
+		stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
+	}
+
+	return (unsigned char*)result;
+}
+
+static stbi__uint16* stbi__load_and_postprocess_16bit(stbi__context* s, int* x, int* y, int* comp, int req_comp)
+{
+	stbi__result_info ri;
+	void* result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
+
+	if (result == NULL)
+		return NULL;
+
+	if (ri.bits_per_channel != 16) {
+		STBI_ASSERT(ri.bits_per_channel == 8);
+		result = stbi__convert_8_to_16((stbi_uc*)result, *x, *y, req_comp == 0 ? *comp : req_comp);
+		ri.bits_per_channel = 16;
+	}
+
+	// @TODO: move stbi__convert_format16 to here
+	// @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
+
+	if (stbi__vertically_flip_on_load) {
+		int channels = req_comp ? req_comp : *comp;
+		stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
+	}
+
+	return (stbi__uint16*)result;
+}
+
+#if !defined(STBI_NO_HDR) || !defined(STBI_NO_LINEAR)
+static void stbi__float_postprocess(float* result, int* x, int* y, int* comp, int req_comp)
+{
+	if (stbi__vertically_flip_on_load && result != NULL) {
+		int channels = req_comp ? req_comp : *comp;
+		stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
+	}
+}
+#endif
+
+#ifndef STBI_NO_STDIO
+
+static FILE* stbi__fopen(char const* filename, char const* mode)
+{
+	FILE* f;
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+	if (0 != fopen_s(&f, filename, mode))
+		f = 0;
+#else
+	f = fopen(filename, mode);
+#endif
+	return f;
+}
+
+
+STBIDEF stbi_uc* stbi_load(char const* filename, int* x, int* y, int* comp, int req_comp)
+{
+	FILE* f = stbi__fopen(filename, "rb");
+	unsigned char* result;
+	if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
+	result = stbi_load_from_file(f, x, y, comp, req_comp);
+	fclose(f);
+	return result;
+}
+
+STBIDEF stbi_uc* stbi_load_from_file(FILE* f, int* x, int* y, int* comp, int req_comp)
+{
+	unsigned char* result;
+	stbi__context s;
+	stbi__start_file(&s, f);
+	result = stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);
+	if (result) {
+		// need to 'unget' all the characters in the IO buffer
+		fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR);
+	}
+	return result;
+}
+
+STBIDEF stbi__uint16* stbi_load_from_file_16(FILE* f, int* x, int* y, int* comp, int req_comp)
+{
+	stbi__uint16* result;
+	stbi__context s;
+	stbi__start_file(&s, f);
+	result = stbi__load_and_postprocess_16bit(&s, x, y, comp, req_comp);
+	if (result) {
+		// need to 'unget' all the characters in the IO buffer
+		fseek(f, -(int)(s.img_buffer_end - s.img_buffer), SEEK_CUR);
+	}
+	return result;
+}
+
+STBIDEF stbi_us* stbi_load_16(char const* filename, int* x, int* y, int* comp, int req_comp)
+{
+	FILE* f = stbi__fopen(filename, "rb");
+	stbi__uint16* result;
+	if (!f) return (stbi_us*)stbi__errpuc("can't fopen", "Unable to open file");
+	result = stbi_load_from_file_16(f, x, y, comp, req_comp);
+	fclose(f);
+	return result;
+}
+
+
+#endif //!STBI_NO_STDIO
+
+STBIDEF stbi_us* stbi_load_16_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* channels_in_file, int desired_channels)
+{
+	stbi__context s;
+	stbi__start_mem(&s, buffer, len);
+	return stbi__load_and_postprocess_16bit(&s, x, y, channels_in_file, desired_channels);
+}
+
+STBIDEF stbi_us* stbi_load_16_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* channels_in_file, int desired_channels)
+{
+	stbi__context s;
+	stbi__start_callbacks(&s, (stbi_io_callbacks*)clbk, user);
+	return stbi__load_and_postprocess_16bit(&s, x, y, channels_in_file, desired_channels);
+}
+
+STBIDEF stbi_uc* stbi_load_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* comp, int req_comp)
+{
+	stbi__context s;
+	stbi__start_mem(&s, buffer, len);
+	return stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);
+}
+
+STBIDEF stbi_uc* stbi_load_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* comp, int req_comp)
+{
+	stbi__context s;
+	stbi__start_callbacks(&s, (stbi_io_callbacks*)clbk, user);
+	return stbi__load_and_postprocess_8bit(&s, x, y, comp, req_comp);
+}
+
+#ifndef STBI_NO_GIF
+STBIDEF stbi_uc* stbi_load_gif_from_memory(stbi_uc const* buffer, int len, int** delays, int* x, int* y, int* z, int* comp, int req_comp)
+{
+	unsigned char* result;
+	stbi__context s;
+	stbi__start_mem(&s, buffer, len);
+
+	result = (unsigned char*)stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);
+	if (stbi__vertically_flip_on_load) {
+		stbi__vertical_flip_slices(result, *x, *y, *z, *comp);
+	}
+
+	return result;
+}
+#endif
+
+#ifndef STBI_NO_LINEAR
+static float* stbi__loadf_main(stbi__context* s, int* x, int* y, int* comp, int req_comp)
+{
+	unsigned char* data;
+#ifndef STBI_NO_HDR
+	if (stbi__hdr_test(s)) {
+		stbi__result_info ri;
+		float* hdr_data = stbi__hdr_load(s, x, y, comp, req_comp, &ri);
+		if (hdr_data)
+			stbi__float_postprocess(hdr_data, x, y, comp, req_comp);
+		return hdr_data;
+	}
+#endif
+	data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
+	if (data)
+		return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
+	return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
+}
+
+STBIDEF float* stbi_loadf_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* comp, int req_comp)
+{
+	stbi__context s;
+	stbi__start_mem(&s, buffer, len);
+	return stbi__loadf_main(&s, x, y, comp, req_comp);
+}
+
+STBIDEF float* stbi_loadf_from_callbacks(stbi_io_callbacks const* clbk, void* user, int* x, int* y, int* comp, int req_comp)
+{
+	stbi__context s;
+	stbi__start_callbacks(&s, (stbi_io_callbacks*)clbk, user);
+	return stbi__loadf_main(&s, x, y, comp, req_comp);
+}
+
+#ifndef STBI_NO_STDIO
+STBIDEF float* stbi_loadf(char const* filename, int* x, int* y, int* comp, int req_comp)
+{
+	float* result;
+	FILE* f = stbi__fopen(filename, "rb");
+	if (!f) return stbi__errpf("can't fopen", "Unable to open file");
+	result = stbi_loadf_from_file(f, x, y, comp, req_comp);
+	fclose(f);
+	return result;
+}
+
+STBIDEF float* stbi_loadf_from_file(FILE* f, int* x, int* y, int* comp, int req_comp)
+{
+	stbi__context s;
+	stbi__start_file(&s, f);
+	return stbi__loadf_main(&s, x, y, comp, req_comp);
+}
+#endif // !STBI_NO_STDIO
+
+#endif // !STBI_NO_LINEAR
+
+// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
+// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
+// reports false!
+
+STBIDEF int stbi_is_hdr_from_memory(stbi_uc const* buffer, int len)
+{
+#ifndef STBI_NO_HDR
+	stbi__context s;
+	stbi__start_mem(&s, buffer, len);
+	return stbi__hdr_test(&s);
+#else
+	STBI_NOTUSED(buffer);
+	STBI_NOTUSED(len);
+	return 0;
+#endif
+}
+
+#ifndef STBI_NO_STDIO
+STBIDEF int      stbi_is_hdr(char const* filename)
+{
+	FILE* f = stbi__fopen(filename, "rb");
+	int result = 0;
+	if (f) {
+		result = stbi_is_hdr_from_file(f);
+		fclose(f);
+	}
+	return result;
+}
+
+STBIDEF int stbi_is_hdr_from_file(FILE* f)
+{
+#ifndef STBI_NO_HDR
+	long pos = ftell(f);
+	int res;
+	stbi__context s;
+	stbi__start_file(&s, f);
+	res = stbi__hdr_test(&s);
+	fseek(f, pos, SEEK_SET);
+	return res;
+#else
+	STBI_NOTUSED(f);
+	return 0;
+#endif
+}
+#endif // !STBI_NO_STDIO
+
+STBIDEF int      stbi_is_hdr_from_callbacks(stbi_io_callbacks const* clbk, void* user)
+{
+#ifndef STBI_NO_HDR
+	stbi__context s;
+	stbi__start_callbacks(&s, (stbi_io_callbacks*)clbk, user);
+	return stbi__hdr_test(&s);
+#else
+	STBI_NOTUSED(clbk);
+	STBI_NOTUSED(user);
+	return 0;
+#endif
+}
+
+#ifndef STBI_NO_LINEAR
+static float stbi__l2h_gamma = 2.2f, stbi__l2h_scale = 1.0f;
+
+STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
+STBIDEF void   stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
+#endif
+
+static float stbi__h2l_gamma_i = 1.0f / 2.2f, stbi__h2l_scale_i = 1.0f;
+
+STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1 / gamma; }
+STBIDEF void   stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1 / scale; }
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+// Common code used by all image loaders
+//
+
+enum
+{
+	STBI__SCAN_load = 0,
+	STBI__SCAN_type,
+	STBI__SCAN_header
+};
+
+static void stbi__refill_buffer(stbi__context* s)
+{
+	int n = (s->io.read)(s->io_user_data, (char*)s->buffer_start, s->buflen);
+	if (n == 0) {
+		// at end of file, treat same as if from memory, but need to handle case
+		// where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
+		s->read_from_callbacks = 0;
+		s->img_buffer = s->buffer_start;
+		s->img_buffer_end = s->buffer_start + 1;
+		*s->img_buffer = 0;
+	}
+	else {
+		s->img_buffer = s->buffer_start;
+		s->img_buffer_end = s->buffer_start + n;
+	}
+}
+
+stbi_inline static stbi_uc stbi__get8(stbi__context* s)
+{
+	if (s->img_buffer < s->img_buffer_end)
+		return *s->img_buffer++;
+	if (s->read_from_callbacks) {
+		stbi__refill_buffer(s);
+		return *s->img_buffer++;
+	}
+	return 0;
+}
+
+stbi_inline static int stbi__at_eof(stbi__context* s)
+{
+	if (s->io.read) {
+		if (!(s->io.eof)(s->io_user_data)) return 0;
+		// if feof() is true, check if buffer = end
+		// special case: we've only got the special 0 character at the end
+		if (s->read_from_callbacks == 0) return 1;
+	}
+
+	return s->img_buffer >= s->img_buffer_end;
+}
+
+static void stbi__skip(stbi__context* s, int n)
+{
+	if (n < 0) {
+		s->img_buffer = s->img_buffer_end;
+		return;
+	}
+	if (s->io.read) {
+		int blen = (int)(s->img_buffer_end - s->img_buffer);
+		if (blen < n) {
+			s->img_buffer = s->img_buffer_end;
+			(s->io.skip)(s->io_user_data, n - blen);
+			return;
+		}
+	}
+	s->img_buffer += n;
+}
+
+static int stbi__getn(stbi__context* s, stbi_uc* buffer, int n)
+{
+	if (s->io.read) {
+		int blen = (int)(s->img_buffer_end - s->img_buffer);
+		if (blen < n) {
+			int res, count;
+
+			memcpy(buffer, s->img_buffer, blen);
+
+			count = (s->io.read)(s->io_user_data, (char*)buffer + blen, n - blen);
+			res = (count == (n - blen));
+			s->img_buffer = s->img_buffer_end;
+			return res;
+		}
+	}
+
+	if (s->img_buffer + n <= s->img_buffer_end) {
+		memcpy(buffer, s->img_buffer, n);
+		s->img_buffer += n;
+		return 1;
+	}
+	else
+		return 0;
+}
+
+static int stbi__get16be(stbi__context* s)
+{
+	int z = stbi__get8(s);
+	return (z << 8) + stbi__get8(s);
+}
+
+static stbi__uint32 stbi__get32be(stbi__context* s)
+{
+	stbi__uint32 z = stbi__get16be(s);
+	return (z << 16) + stbi__get16be(s);
+}
+
+#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
+// nothing
+#else
+static int stbi__get16le(stbi__context* s)
+{
+	int z = stbi__get8(s);
+	return z + (stbi__get8(s) << 8);
+}
+#endif
+
+#ifndef STBI_NO_BMP
+static stbi__uint32 stbi__get32le(stbi__context* s)
+{
+	stbi__uint32 z = stbi__get16le(s);
+	return z + (stbi__get16le(s) << 16);
+}
+#endif
+
+#define STBI__BYTECAST(x)  ((stbi_uc) ((x) & 255))  // truncate int to byte without warnings
+
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//  generic converter from built-in img_n to req_comp
+//    individual types do this automatically as much as possible (e.g. jpeg
+//    does all cases internally since it needs to colorspace convert anyway,
+//    and it never has alpha, so very few cases ). png can automatically
+//    interleave an alpha=255 channel, but falls back to this for other cases
+//
+//  assume data buffer is malloced, so malloc a new one and free that one
+//  only failure mode is malloc failing
+
+static stbi_uc stbi__compute_y(int r, int g, int b)
+{
+	return (stbi_uc)(((r * 77) + (g * 150) + (29 * b)) >> 8);
+}
+
+static unsigned char* stbi__convert_format(unsigned char* data, int img_n, int req_comp, unsigned int x, unsigned int y)
+{
+	int i, j;
+	unsigned char* good;
+
+	if (req_comp == img_n) return data;
+	STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
+
+	good = (unsigned char*)stbi__malloc_mad3(req_comp, x, y, 0);
+	if (good == NULL) {
+		STBI_FREE(data);
+		return stbi__errpuc("outofmem", "Out of memory");
+	}
+
+	for (j = 0; j < (int)y; ++j) {
+		unsigned char* src = data + j * x * img_n;
+		unsigned char* dest = good + j * x * req_comp;
+
+#define STBI__COMBO(a,b)  ((a)*8+(b))
+#define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
+		// convert source image with img_n components to one with req_comp components;
+		// avoid switch per pixel, so use switch per scanline and massive macros
+		switch (STBI__COMBO(img_n, req_comp)) {
+			STBI__CASE(1, 2) { dest[0] = src[0], dest[1] = 255; } break;
+			STBI__CASE(1, 3) { dest[0] = dest[1] = dest[2] = src[0]; } break;
+			STBI__CASE(1, 4) { dest[0] = dest[1] = dest[2] = src[0], dest[3] = 255; } break;
+			STBI__CASE(2, 1) { dest[0] = src[0]; } break;
+			STBI__CASE(2, 3) { dest[0] = dest[1] = dest[2] = src[0]; } break;
+			STBI__CASE(2, 4) { dest[0] = dest[1] = dest[2] = src[0], dest[3] = src[1]; } break;
+			STBI__CASE(3, 4) { dest[0] = src[0], dest[1] = src[1], dest[2] = src[2], dest[3] = 255; } break;
+			STBI__CASE(3, 1) { dest[0] = stbi__compute_y(src[0], src[1], src[2]); } break;
+			STBI__CASE(3, 2) { dest[0] = stbi__compute_y(src[0], src[1], src[2]), dest[1] = 255; } break;
+			STBI__CASE(4, 1) { dest[0] = stbi__compute_y(src[0], src[1], src[2]); } break;
+			STBI__CASE(4, 2) { dest[0] = stbi__compute_y(src[0], src[1], src[2]), dest[1] = src[3]; } break;
+			STBI__CASE(4, 3) { dest[0] = src[0], dest[1] = src[1], dest[2] = src[2]; } break;
+		default: STBI_ASSERT(0);
+		}
+#undef STBI__CASE
+	}
+
+	STBI_FREE(data);
+	return good;
+}
+
+static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
+{
+	return (stbi__uint16)(((r * 77) + (g * 150) + (29 * b)) >> 8);
+}
+
+static stbi__uint16* stbi__convert_format16(stbi__uint16* data, int img_n, int req_comp, unsigned int x, unsigned int y)
+{
+	int i, j;
+	stbi__uint16* good;
+
+	if (req_comp == img_n) return data;
+	STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
+
+	good = (stbi__uint16*)stbi__malloc(req_comp * x * y * 2);
+	if (good == NULL) {
+		STBI_FREE(data);
+		return (stbi__uint16*)stbi__errpuc("outofmem", "Out of memory");
+	}
+
+	for (j = 0; j < (int)y; ++j) {
+		stbi__uint16* src = data + j * x * img_n;
+		stbi__uint16* dest = good + j * x * req_comp;
+
+#define STBI__COMBO(a,b)  ((a)*8+(b))
+#define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
+		// convert source image with img_n components to one with req_comp components;
+		// avoid switch per pixel, so use switch per scanline and massive macros
+		switch (STBI__COMBO(img_n, req_comp)) {
+			STBI__CASE(1, 2) { dest[0] = src[0], dest[1] = 0xffff; } break;
+			STBI__CASE(1, 3) { dest[0] = dest[1] = dest[2] = src[0]; } break;
+			STBI__CASE(1, 4) { dest[0] = dest[1] = dest[2] = src[0], dest[3] = 0xffff; } break;
+			STBI__CASE(2, 1) { dest[0] = src[0]; } break;
+			STBI__CASE(2, 3) { dest[0] = dest[1] = dest[2] = src[0]; } break;
+			STBI__CASE(2, 4) { dest[0] = dest[1] = dest[2] = src[0], dest[3] = src[1]; } break;
+			STBI__CASE(3, 4) { dest[0] = src[0], dest[1] = src[1], dest[2] = src[2], dest[3] = 0xffff; } break;
+			STBI__CASE(3, 1) { dest[0] = stbi__compute_y_16(src[0], src[1], src[2]); } break;
+			STBI__CASE(3, 2) { dest[0] = stbi__compute_y_16(src[0], src[1], src[2]), dest[1] = 0xffff; } break;
+			STBI__CASE(4, 1) { dest[0] = stbi__compute_y_16(src[0], src[1], src[2]); } break;
+			STBI__CASE(4, 2) { dest[0] = stbi__compute_y_16(src[0], src[1], src[2]), dest[1] = src[3]; } break;
+			STBI__CASE(4, 3) { dest[0] = src[0], dest[1] = src[1], dest[2] = src[2]; } break;
+		default: STBI_ASSERT(0);
+		}
+#undef STBI__CASE
+	}
+
+	STBI_FREE(data);
+	return good;
+}
+
+#ifndef STBI_NO_LINEAR
+static float* stbi__ldr_to_hdr(stbi_uc* data, int x, int y, int comp)
+{
+	int i, k, n;
+	float* output;
+	if (!data) return NULL;
+	output = (float*)stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
+	if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
+	// compute number of non-alpha components
+	if (comp & 1) n = comp; else n = comp - 1;
+	for (i = 0; i < x * y; ++i) {
+		for (k = 0; k < n; ++k) {
+			output[i * comp + k] = (float)(pow(data[i * comp + k] / 255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
+		}
+		if (k < comp) output[i * comp + k] = data[i * comp + k] / 255.0f;
+	}
+	STBI_FREE(data);
+	return output;
+}
+#endif
+
+#ifndef STBI_NO_HDR
+#define stbi__float2int(x)   ((int) (x))
+static stbi_uc* stbi__hdr_to_ldr(float* data, int x, int y, int comp)
+{
+	int i, k, n;
+	stbi_uc* output;
+	if (!data) return NULL;
+	output = (stbi_uc*)stbi__malloc_mad3(x, y, comp, 0);
+	if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
+	// compute number of non-alpha components
+	if (comp & 1) n = comp; else n = comp - 1;
+	for (i = 0; i < x * y; ++i) {
+		for (k = 0; k < n; ++k) {
+			float z = (float)pow(data[i * comp + k] * stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
+			if (z < 0) z = 0;
+			if (z > 255) z = 255;
+			output[i * comp + k] = (stbi_uc)stbi__float2int(z);
+		}
+		if (k < comp) {
+			float z = data[i * comp + k] * 255 + 0.5f;
+			if (z < 0) z = 0;
+			if (z > 255) z = 255;
+			output[i * comp + k] = (stbi_uc)stbi__float2int(z);
+		}
+	}
+	STBI_FREE(data);
+	return output;
+}
+#endif
+
+//////////////////////////////////////////////////////////////////////////////
+//
+//  "baseline" JPEG/JFIF decoder
+//
+//    simple implementation
+//      - doesn't support delayed output of y-dimension
+//      - simple interface (only one output format: 8-bit interleaved RGB)
+//      - doesn't try to recover corrupt jpegs
+//      - doesn't allow partial loading, loading multiple at once
+//      - still fast on x86 (copying globals into locals doesn't help x86)
+//      - allocates lots of intermediate memory (full size of all components)
+//        - non-interleaved case requires this anyway
+//        - allows good upsampling (see next)
+//    high-quality
+//      - upsampled channels are bilinearly interpolated, even across blocks
+//      - quality integer IDCT derived from IJG's 'slow'
+//    performance
+//      - fast huffman; reasonable integer IDCT
+//      - some SIMD kernels for common paths on targets with SSE2/NEON
+//      - uses a lot of intermediate memory, could cache poorly
+
+#ifndef STBI_NO_JPEG
+
+// huffman decoding acceleration
+#define FAST_BITS   9  // larger handles more cases; smaller stomps less cache
+
+typedef struct
+{
+	stbi_uc  fast[1 << FAST_BITS];
+	// weirdly, repacking this into AoS is a 10% speed loss, instead of a win
+	stbi__uint16 code[256];
+	stbi_uc  values[256];
+	stbi_uc  size[257];
+	unsigned int maxcode[18];
+	int    delta[17];   // old 'firstsymbol' - old 'firstcode'
+} stbi__huffman;
+
+typedef struct
+{
+	stbi__context* s;
+	stbi__huffman huff_dc[4];
+	stbi__huffman huff_ac[4];
+	stbi__uint16 dequant[4][64];
+	stbi__int16 fast_ac[4][1 << FAST_BITS];
+
+	// sizes for components, interleaved MCUs
+	int img_h_max, img_v_max;
+	int img_mcu_x, img_mcu_y;
+	int img_mcu_w, img_mcu_h;
+
+	// definition of jpeg image component
+	struct
+	{
+		int id;
+		int h, v;
+		int tq;
+		int hd, ha;
+		int dc_pred;
+
+		int x, y, w2, h2;
+		stbi_uc* data;
+		void* raw_data, * raw_coeff;
+		stbi_uc* linebuf;
+		short* coeff;   // progressive only
+		int      coeff_w, coeff_h; // number of 8x8 coefficient blocks
+	} img_comp[4];
+
+	stbi__uint32   code_buffer; // jpeg entropy-coded buffer
+	int            code_bits;   // number of valid bits
+	unsigned char  marker;      // marker seen while filling entropy buffer
+	int            nomore;      // flag if we saw a marker so must stop
+
+	int            progressive;
+	int            spec_start;
+	int            spec_end;
+	int            succ_high;
+	int            succ_low;
+	int            eob_run;
+	int            jfif;
+	int            app14_color_transform; // Adobe APP14 tag
+	int            rgb;
+
+	int scan_n, order[4];
+	int restart_interval, todo;
+
+	// kernels
+	void (*idct_block_kernel)(stbi_uc* out, int out_stride, short data[64]);
+	void (*YCbCr_to_RGB_kernel)(stbi_uc* out, const stbi_uc* y, const stbi_uc* pcb, const stbi_uc* pcr, int count, int step);
+	stbi_uc* (*resample_row_hv_2_kernel)(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs);
+} stbi__jpeg;
+
+static int stbi__build_huffman(stbi__huffman* h, int* count)
+{
+	int i, j, k = 0;
+	unsigned int code;
+	// build size list for each symbol (from JPEG spec)
+	for (i = 0; i < 16; ++i)
+		for (j = 0; j < count[i]; ++j)
+			h->size[k++] = (stbi_uc)(i + 1);
+	h->size[k] = 0;
+
+	// compute actual symbols (from jpeg spec)
+	code = 0;
+	k = 0;
+	for (j = 1; j <= 16; ++j) {
+		// compute delta to add to code to compute symbol id
+		h->delta[j] = k - code;
+		if (h->size[k] == j) {
+			while (h->size[k] == j)
+				h->code[k++] = (stbi__uint16)(code++);
+			if (code - 1 >= (1u << j)) return stbi__err("bad code lengths", "Corrupt JPEG");
+		}
+		// compute largest code + 1 for this size, preshifted as needed later
+		h->maxcode[j] = code << (16 - j);
+		code <<= 1;
+	}
+	h->maxcode[j] = 0xffffffff;
+
+	// build non-spec acceleration table; 255 is flag for not-accelerated
+	memset(h->fast, 255, 1 << FAST_BITS);
+	for (i = 0; i < k; ++i) {
+		int s = h->size[i];
+		if (s <= FAST_BITS) {
+			int c = h->code[i] << (FAST_BITS - s);
+			int m = 1 << (FAST_BITS - s);
+			for (j = 0; j < m; ++j) {
+				h->fast[c + j] = (stbi_uc)i;
+			}
+		}
+	}
+	return 1;
+}
+
+// build a table that decodes both magnitude and value of small ACs in
+// one go.
+static void stbi__build_fast_ac(stbi__int16* fast_ac, stbi__huffman* h)
+{
+	int i;
+	for (i = 0; i < (1 << FAST_BITS); ++i) {
+		stbi_uc fast = h->fast[i];
+		fast_ac[i] = 0;
+		if (fast < 255) {
+			int rs = h->values[fast];
+			int run = (rs >> 4) & 15;
+			int magbits = rs & 15;
+			int len = h->size[fast];
+
+			if (magbits && len + magbits <= FAST_BITS) {
+				// magnitude code followed by receive_extend code
+				int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
+				int m = 1 << (magbits - 1);
+				if (k < m) k += (~0U << magbits) + 1;
+				// if the result is small enough, we can fit it in fast_ac table
+				if (k >= -128 && k <= 127)
+					fast_ac[i] = (stbi__int16)((k * 256) + (run * 16) + (len + magbits));
+			}
+		}
+	}
+}
+
+static void stbi__grow_buffer_unsafe(stbi__jpeg* j)
+{
+	do {
+		unsigned int b = j->nomore ? 0 : stbi__get8(j->s);
+		if (b == 0xff) {
+			int c = stbi__get8(j->s);
+			while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
+			if (c != 0) {
+				j->marker = (unsigned char)c;
+				j->nomore = 1;
+				return;
+			}
+		}
+		j->code_buffer |= b << (24 - j->code_bits);
+		j->code_bits += 8;
+	} while (j->code_bits <= 24);
+}
+
+// (1 << n) - 1
+static const stbi__uint32 stbi__bmask[17] = { 0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535 };
+
+// decode a jpeg huffman value from the bitstream
+stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg* j, stbi__huffman* h)
+{
+	unsigned int temp;
+	int c, k;
+
+	if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+
+	// look at the top FAST_BITS and determine what symbol ID it is,
+	// if the code is <= FAST_BITS
+	c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
+	k = h->fast[c];
+	if (k < 255) {
+		int s = h->size[k];
+		if (s > j->code_bits)
+			return -1;
+		j->code_buffer <<= s;
+		j->code_bits -= s;
+		return h->values[k];
+	}
+
+	// naive test is to shift the code_buffer down so k bits are
+	// valid, then test against maxcode. To speed this up, we've
+	// preshifted maxcode left so that it has (16-k) 0s at the
+	// end; in other words, regardless of the number of bits, it
+	// wants to be compared against something shifted to have 16;
+	// that way we don't need to shift inside the loop.
+	temp = j->code_buffer >> 16;
+	for (k = FAST_BITS + 1; ; ++k)
+		if (temp < h->maxcode[k])
+			break;
+	if (k == 17) {
+		// error! code not found
+		j->code_bits -= 16;
+		return -1;
+	}
+
+	if (k > j->code_bits)
+		return -1;
+
+	// convert the huffman code to the symbol id
+	c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
+	STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
+
+	// convert the id to a symbol
+	j->code_bits -= k;
+	j->code_buffer <<= k;
+	return h->values[c];
+}
+
+// bias[n] = (-1<<n) + 1
+static const int stbi__jbias[16] = { 0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767 };
+
+// combined JPEG 'receive' and JPEG 'extend', since baseline
+// always extends everything it receives.
+stbi_inline static int stbi__extend_receive(stbi__jpeg* j, int n)
+{
+	unsigned int k;
+	int sgn;
+	if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
+
+	sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
+	k = stbi_lrot(j->code_buffer, n);
+	STBI_ASSERT(n >= 0 && n < (int)(sizeof(stbi__bmask) / sizeof(*stbi__bmask)));
+	j->code_buffer = k & ~stbi__bmask[n];
+	k &= stbi__bmask[n];
+	j->code_bits -= n;
+	return k + (stbi__jbias[n] & ~sgn);
+}
+
+// get some unsigned bits
+stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg* j, int n)
+{
+	unsigned int k;
+	if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
+	k = stbi_lrot(j->code_buffer, n);
+	j->code_buffer = k & ~stbi__bmask[n];
+	k &= stbi__bmask[n];
+	j->code_bits -= n;
+	return k;
+}
+
+stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg* j)
+{
+	unsigned int k;
+	if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
+	k = j->code_buffer;
+	j->code_buffer <<= 1;
+	--j->code_bits;
+	return k & 0x80000000;
+}
+
+// given a value that's at position X in the zigzag stream,
+// where does it appear in the 8x8 matrix coded as row-major?
+static const stbi_uc stbi__jpeg_dezigzag[64 + 15] =
+{
+	0,  1,  8, 16,  9,  2,  3, 10,
+   17, 24, 32, 25, 18, 11,  4,  5,
+   12, 19, 26, 33, 40, 48, 41, 34,
+   27, 20, 13,  6,  7, 14, 21, 28,
+   35, 42, 49, 56, 57, 50, 43, 36,
+   29, 22, 15, 23, 30, 37, 44, 51,
+   58, 59, 52, 45, 38, 31, 39, 46,
+   53, 60, 61, 54, 47, 55, 62, 63,
+   // let corrupt input sample past end
+   63, 63, 63, 63, 63, 63, 63, 63,
+   63, 63, 63, 63, 63, 63, 63
+};
+
+// decode one 64-entry block--
+static int stbi__jpeg_decode_block(stbi__jpeg* j, short data[64], stbi__huffman* hdc, stbi__huffman* hac, stbi__int16* fac, int b, stbi__uint16* dequant)
+{
+	int diff, dc, k;
+	int t;
+
+	if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+	t = stbi__jpeg_huff_decode(j, hdc);
+	if (t < 0) return stbi__err("bad huffman code", "Corrupt JPEG");
+
+	// 0 all the ac values now so we can do it 32-bits at a time
+	memset(data, 0, 64 * sizeof(data[0]));
+
+	diff = t ? stbi__extend_receive(j, t) : 0;
+	dc = j->img_comp[b].dc_pred + diff;
+	j->img_comp[b].dc_pred = dc;
+	data[0] = (short)(dc * dequant[0]);
+
+	// decode AC components, see JPEG spec
+	k = 1;
+	do {
+		unsigned int zig;
+		int c, r, s;
+		if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+		c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
+		r = fac[c];
+		if (r) { // fast-AC path
+			k += (r >> 4) & 15; // run
+			s = r & 15; // combined length
+			j->code_buffer <<= s;
+			j->code_bits -= s;
+			// decode into unzigzag'd location
+			zig = stbi__jpeg_dezigzag[k++];
+			data[zig] = (short)((r >> 8) * dequant[zig]);
+		}
+		else {
+			int rs = stbi__jpeg_huff_decode(j, hac);
+			if (rs < 0) return stbi__err("bad huffman code", "Corrupt JPEG");
+			s = rs & 15;
+			r = rs >> 4;
+			if (s == 0) {
+				if (rs != 0xf0) break; // end block
+				k += 16;
+			}
+			else {
+				k += r;
+				// decode into unzigzag'd location
+				zig = stbi__jpeg_dezigzag[k++];
+				data[zig] = (short)(stbi__extend_receive(j, s) * dequant[zig]);
+			}
+		}
+	} while (k < 64);
+	return 1;
+}
+
+static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg* j, short data[64], stbi__huffman* hdc, int b)
+{
+	int diff, dc;
+	int t;
+	if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+
+	if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+
+	if (j->succ_high == 0) {
+		// first scan for DC coefficient, must be first
+		memset(data, 0, 64 * sizeof(data[0])); // 0 all the ac values now
+		t = stbi__jpeg_huff_decode(j, hdc);
+		diff = t ? stbi__extend_receive(j, t) : 0;
+
+		dc = j->img_comp[b].dc_pred + diff;
+		j->img_comp[b].dc_pred = dc;
+		data[0] = (short)(dc << j->succ_low);
+	}
+	else {
+		// refinement scan for DC coefficient
+		if (stbi__jpeg_get_bit(j))
+			data[0] += (short)(1 << j->succ_low);
+	}
+	return 1;
+}
+
+// @OPTIMIZE: store non-zigzagged during the decode passes,
+// and only de-zigzag when dequantizing
+static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg* j, short data[64], stbi__huffman* hac, stbi__int16* fac)
+{
+	int k;
+	if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
+
+	if (j->succ_high == 0) {
+		int shift = j->succ_low;
+
+		if (j->eob_run) {
+			--j->eob_run;
+			return 1;
+		}
+
+		k = j->spec_start;
+		do {
+			unsigned int zig;
+			int c, r, s;
+			if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
+			c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS) - 1);
+			r = fac[c];
+			if (r) { // fast-AC path
+				k += (r >> 4) & 15; // run
+				s = r & 15; // combined length
+				j->code_buffer <<= s;
+				j->code_bits -= s;
+				zig = stbi__jpeg_dezigzag[k++];
+				data[zig] = (short)((r >> 8) << shift);
+			}
+			else {
+				int rs = stbi__jpeg_huff_decode(j, hac);
+				if (rs < 0) return stbi__err("bad huffman code", "Corrupt JPEG");
+				s = rs & 15;
+				r = rs >> 4;
+				if (s == 0) {
+					if (r < 15) {
+						j->eob_run = (1 << r);
+						if (r)
+							j->eob_run += stbi__jpeg_get_bits(j, r);
+						--j->eob_run;
+						break;
+					}
+					k += 16;
+				}
+				else {
+					k += r;
+					zig = stbi__jpeg_dezigzag[k++];
+					data[zig] = (short)(stbi__extend_receive(j, s) << shift);
+				}
+			}
+		} while (k <= j->spec_end);
+	}
+	else {
+		// refinement scan for these AC coefficients
+
+		short bit = (short)(1 << j->succ_low);
+
+		if (j->eob_run) {
+			--j->eob_run;
+			for (k = j->spec_start; k <= j->spec_end; ++k) {
+				short* p = &data[stbi__jpeg_dezigzag[k]];
+				if (*p != 0)
+					if (stbi__jpeg_get_bit(j))
+						if ((*p & bit) == 0) {
+							if (*p > 0)
+								* p += bit;
+							else
+								*p -= bit;
+						}
+			}
+		}
+		else {
+			k = j->spec_start;
+			do {
+				int r, s;
+				int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
+				if (rs < 0) return stbi__err("bad huffman code", "Corrupt JPEG");
+				s = rs & 15;
+				r = rs >> 4;
+				if (s == 0) {
+					if (r < 15) {
+						j->eob_run = (1 << r) - 1;
+						if (r)
+							j->eob_run += stbi__jpeg_get_bits(j, r);
+						r = 64; // force end of block
+					}
+					else {
+						// r=15 s=0 should write 16 0s, so we just do
+						// a run of 15 0s and then write s (which is 0),
+						// so we don't have to do anything special here
+					}
+				}
+				else {
+					if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
+					// sign bit
+					if (stbi__jpeg_get_bit(j))
+						s = bit;
+					else
+						s = -bit;
+				}
+
+				// advance by r
+				while (k <= j->spec_end) {
+					short* p = &data[stbi__jpeg_dezigzag[k++]];
+					if (*p != 0) {
+						if (stbi__jpeg_get_bit(j))
+							if ((*p & bit) == 0) {
+								if (*p > 0)
+									* p += bit;
+								else
+									*p -= bit;
+							}
+					}
+					else {
+						if (r == 0) {
+							*p = (short)s;
+							break;
+						}
+						--r;
+					}
+				}
+			} while (k <= j->spec_end);
+		}
+	}
+	return 1;
+}
+
+// take a -128..127 value and stbi__clamp it and convert to 0..255
+stbi_inline static stbi_uc stbi__clamp(int x)
+{
+	// trick to use a single test to catch both cases
+	if ((unsigned int)x > 255) {
+		if (x < 0) return 0;
+		if (x > 255) return 255;
+	}
+	return (stbi_uc)x;
+}
+
+#define stbi__f2f(x)  ((int) (((x) * 4096 + 0.5)))
+#define stbi__fsh(x)  ((x) * 4096)
+
+// derived from jidctint -- DCT_ISLOW
+#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
+   int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
+   p2 = s2;                                    \
+   p3 = s6;                                    \
+   p1 = (p2+p3) * stbi__f2f(0.5411961f);       \
+   t2 = p1 + p3*stbi__f2f(-1.847759065f);      \
+   t3 = p1 + p2*stbi__f2f( 0.765366865f);      \
+   p2 = s0;                                    \
+   p3 = s4;                                    \
+   t0 = stbi__fsh(p2+p3);                      \
+   t1 = stbi__fsh(p2-p3);                      \
+   x0 = t0+t3;                                 \
+   x3 = t0-t3;                                 \
+   x1 = t1+t2;                                 \
+   x2 = t1-t2;                                 \
+   t0 = s7;                                    \
+   t1 = s5;                                    \
+   t2 = s3;                                    \
+   t3 = s1;                                    \
+   p3 = t0+t2;                                 \
+   p4 = t1+t3;                                 \
+   p1 = t0+t3;                                 \
+   p2 = t1+t2;                                 \
+   p5 = (p3+p4)*stbi__f2f( 1.175875602f);      \
+   t0 = t0*stbi__f2f( 0.298631336f);           \
+   t1 = t1*stbi__f2f( 2.053119869f);           \
+   t2 = t2*stbi__f2f( 3.072711026f);           \
+   t3 = t3*stbi__f2f( 1.501321110f);           \
+   p1 = p5 + p1*stbi__f2f(-0.899976223f);      \
+   p2 = p5 + p2*stbi__f2f(-2.562915447f);      \
+   p3 = p3*stbi__f2f(-1.961570560f);           \
+   p4 = p4*stbi__f2f(-0.390180644f);           \
+   t3 += p1+p4;                                \
+   t2 += p2+p3;                                \
+   t1 += p2+p4;                                \
+   t0 += p1+p3;
+
+static void stbi__idct_block(stbi_uc* out, int out_stride, short data[64])
+{
+	int i, val[64], * v = val;
+	stbi_uc* o;
+	short* d = data;
+
+	// columns
+	for (i = 0; i < 8; ++i, ++d, ++v) {
+		// if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
+		if (d[8] == 0 && d[16] == 0 && d[24] == 0 && d[32] == 0
+			&& d[40] == 0 && d[48] == 0 && d[56] == 0) {
+			//    no shortcut                 0     seconds
+			//    (1|2|3|4|5|6|7)==0          0     seconds
+			//    all separate               -0.047 seconds
+			//    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
+			int dcterm = d[0] * 4;
+			v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
+		}
+		else {
+			STBI__IDCT_1D(d[0], d[8], d[16], d[24], d[32], d[40], d[48], d[56])
+				// constants scaled things up by 1<<12; let's bring them back
+				// down, but keep 2 extra bits of precision
+				x0 += 512; x1 += 512; x2 += 512; x3 += 512;
+			v[0] = (x0 + t3) >> 10;
+			v[56] = (x0 - t3) >> 10;
+			v[8] = (x1 + t2) >> 10;
+			v[48] = (x1 - t2) >> 10;
+			v[16] = (x2 + t1) >> 10;
+			v[40] = (x2 - t1) >> 10;
+			v[24] = (x3 + t0) >> 10;
+			v[32] = (x3 - t0) >> 10;
+		}
+	}
+
+	for (i = 0, v = val, o = out; i < 8; ++i, v += 8, o += out_stride) {
+		// no fast case since the first 1D IDCT spread components out
+		STBI__IDCT_1D(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7])
+			// constants scaled things up by 1<<12, plus we had 1<<2 from first
+			// loop, plus horizontal and vertical each scale by sqrt(8) so together
+			// we've got an extra 1<<3, so 1<<17 total we need to remove.
+			// so we want to round that, which means adding 0.5 * 1<<17,
+			// aka 65536. Also, we'll end up with -128 to 127 that we want
+			// to encode as 0..255 by adding 128, so we'll add that before the shift
+			x0 += 65536 + (128 << 17);
+		x1 += 65536 + (128 << 17);
+		x2 += 65536 + (128 << 17);
+		x3 += 65536 + (128 << 17);
+		// tried computing the shifts into temps, or'ing the temps to see
+		// if any were out of range, but that was slower
+		o[0] = stbi__clamp((x0 + t3) >> 17);
+		o[7] = stbi__clamp((x0 - t3) >> 17);
+		o[1] = stbi__clamp((x1 + t2) >> 17);
+		o[6] = stbi__clamp((x1 - t2) >> 17);
+		o[2] = stbi__clamp((x2 + t1) >> 17);
+		o[5] = stbi__clamp((x2 - t1) >> 17);
+		o[3] = stbi__clamp((x3 + t0) >> 17);
+		o[4] = stbi__clamp((x3 - t0) >> 17);
+	}
+}
+
+#ifdef STBI_SSE2
+// sse2 integer IDCT. not the fastest possible implementation but it
+// produces bit-identical results to the generic C version so it's
+// fully "transparent".
+static void stbi__idct_simd(stbi_uc* out, int out_stride, short data[64])
+{
+	// This is constructed to match our regular (generic) integer IDCT exactly.
+	__m128i row0, row1, row2, row3, row4, row5, row6, row7;
+	__m128i tmp;
+
+	// dot product constant: even elems=x, odd elems=y
+#define dct_const(x,y)  _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
+
+// out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit)
+// out(1) = c1[even]*x + c1[odd]*y
+#define dct_rot(out0,out1, x,y,c0,c1) \
+	  __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
+	  __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
+	  __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
+	  __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
+	  __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
+	  __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
+
+   // out = in << 12  (in 16-bit, out 32-bit)
+#define dct_widen(out, in) \
+	  __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
+	  __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
+
+   // wide add
+#define dct_wadd(out, a, b) \
+	  __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
+	  __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
+
+   // wide sub
+#define dct_wsub(out, a, b) \
+	  __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
+	  __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
+
+   // butterfly a/b, add bias, then shift by "s" and pack
+#define dct_bfly32o(out0, out1, a,b,bias,s) \
+	  { \
+		 __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
+		 __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
+		 dct_wadd(sum, abiased, b); \
+		 dct_wsub(dif, abiased, b); \
+		 out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
+		 out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
+	  }
+
+   // 8-bit interleave step (for transposes)
+#define dct_interleave8(a, b) \
+	  tmp = a; \
+	  a = _mm_unpacklo_epi8(a, b); \
+	  b = _mm_unpackhi_epi8(tmp, b)
+
+   // 16-bit interleave step (for transposes)
+#define dct_interleave16(a, b) \
+	  tmp = a; \
+	  a = _mm_unpacklo_epi16(a, b); \
+	  b = _mm_unpackhi_epi16(tmp, b)
+
+#define dct_pass(bias,shift) \
+	  { \
+		 /* even part */ \
+		 dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
+		 __m128i sum04 = _mm_add_epi16(row0, row4); \
+		 __m128i dif04 = _mm_sub_epi16(row0, row4); \
+		 dct_widen(t0e, sum04); \
+		 dct_widen(t1e, dif04); \
+		 dct_wadd(x0, t0e, t3e); \
+		 dct_wsub(x3, t0e, t3e); \
+		 dct_wadd(x1, t1e, t2e); \
+		 dct_wsub(x2, t1e, t2e); \
+		 /* odd part */ \
+		 dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
+		 dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
+		 __m128i sum17 = _mm_add_epi16(row1, row7); \
+		 __m128i sum35 = _mm_add_epi16(row3, row5); \
+		 dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
+		 dct_wadd(x4, y0o, y4o); \
+		 dct_wadd(x5, y1o, y5o); \
+		 dct_wadd(x6, y2o, y5o); \
+		 dct_wadd(x7, y3o, y4o); \
+		 dct_bfly32o(row0,row7, x0,x7,bias,shift); \
+		 dct_bfly32o(row1,row6, x1,x6,bias,shift); \
+		 dct_bfly32o(row2,row5, x2,x5,bias,shift); \
+		 dct_bfly32o(row3,row4, x3,x4,bias,shift); \
+	  }
+
+	__m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
+	__m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f(0.765366865f), stbi__f2f(0.5411961f));
+	__m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
+	__m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
+	__m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f(0.298631336f), stbi__f2f(-1.961570560f));
+	__m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f(3.072711026f));
+	__m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f(2.053119869f), stbi__f2f(-0.390180644f));
+	__m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f(1.501321110f));
+
+	// rounding biases in column/row passes, see stbi__idct_block for explanation.
+	__m128i bias_0 = _mm_set1_epi32(512);
+	__m128i bias_1 = _mm_set1_epi32(65536 + (128 << 17));
+
+	// load
+	row0 = _mm_load_si128((const __m128i*) (data + 0 * 8));
+	row1 = _mm_load_si128((const __m128i*) (data + 1 * 8));
+	row2 = _mm_load_si128((const __m128i*) (data + 2 * 8));
+	row3 = _mm_load_si128((const __m128i*) (data + 3 * 8));
+	row4 = _mm_load_si128((const __m128i*) (data + 4 * 8));
+	row5 = _mm_load_si128((const __m128i*) (data + 5 * 8));
+	row6 = _mm_load_si128((const __m128i*) (data + 6 * 8));
+	row7 = _mm_load_si128((const __m128i*) (data + 7 * 8));
+
+	// column pass
+	dct_pass(bias_0, 10);
+
+	{
+		// 16bit 8x8 transpose pass 1
+		dct_interleave16(row0, row4);
+		dct_interleave16(row1, row5);
+		dct_interleave16(row2, row6);
+		dct_interleave16(row3, row7);
+
+		// transpose pass 2
+		dct_interleave16(row0, row2);
+		dct_interleave16(row1, row3);
+		dct_interleave16(row4, row6);
+		dct_interleave16(row5, row7);
+
+		// transpose pass 3
+		dct_interleave16(row0, row1);
+		dct_interleave16(row2, row3);
+		dct_interleave16(row4, row5);
+		dct_interleave16(row6, row7);
+	}
+
+	// row pass
+	dct_pass(bias_1, 17);
+
+	{
+		// pack
+		__m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
+		__m128i p1 = _mm_packus_epi16(row2, row3);
+		__m128i p2 = _mm_packus_epi16(row4, row5);
+		__m128i p3 = _mm_packus_epi16(row6, row7);
+
+		// 8bit 8x8 transpose pass 1
+		dct_interleave8(p0, p2); // a0e0a1e1...
+		dct_interleave8(p1, p3); // c0g0c1g1...
+
+		// transpose pass 2
+		dct_interleave8(p0, p1); // a0c0e0g0...
+		dct_interleave8(p2, p3); // b0d0f0h0...
+
+		// transpose pass 3
+		dct_interleave8(p0, p2); // a0b0c0d0...
+		dct_interleave8(p1, p3); // a4b4c4d4...
+
+		// store
+		_mm_storel_epi64((__m128i*) out, p0); out += out_stride;
+		_mm_storel_epi64((__m128i*) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
+		_mm_storel_epi64((__m128i*) out, p2); out += out_stride;
+		_mm_storel_epi64((__m128i*) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
+		_mm_storel_epi64((__m128i*) out, p1); out += out_stride;
+		_mm_storel_epi64((__m128i*) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
+		_mm_storel_epi64((__m128i*) out, p3); out += out_stride;
+		_mm_storel_epi64((__m128i*) out, _mm_shuffle_epi32(p3, 0x4e));
+	}
+
+#undef dct_const
+#undef dct_rot
+#undef dct_widen
+#undef dct_wadd
+#undef dct_wsub
+#undef dct_bfly32o
+#undef dct_interleave8
+#undef dct_interleave16
+#undef dct_pass
+}
+
+#endif // STBI_SSE2
+
+#ifdef STBI_NEON
+
+// NEON integer IDCT. should produce bit-identical
+// results to the generic C version.
+static void stbi__idct_simd(stbi_uc* out, int out_stride, short data[64])
+{
+	int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
+
+	int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
+	int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
+	int16x4_t rot0_2 = vdup_n_s16(stbi__f2f(0.765366865f));
+	int16x4_t rot1_0 = vdup_n_s16(stbi__f2f(1.175875602f));
+	int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
+	int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
+	int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
+	int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
+	int16x4_t rot3_0 = vdup_n_s16(stbi__f2f(0.298631336f));
+	int16x4_t rot3_1 = vdup_n_s16(stbi__f2f(2.053119869f));
+	int16x4_t rot3_2 = vdup_n_s16(stbi__f2f(3.072711026f));
+	int16x4_t rot3_3 = vdup_n_s16(stbi__f2f(1.501321110f));
+
+#define dct_long_mul(out, inq, coeff) \
+   int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
+   int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
+
+#define dct_long_mac(out, acc, inq, coeff) \
+   int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
+   int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
+
+#define dct_widen(out, inq) \
+   int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
+   int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
+
+	// wide add
+#define dct_wadd(out, a, b) \
+   int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
+   int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
+
+// wide sub
+#define dct_wsub(out, a, b) \
+   int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
+   int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
+
+// butterfly a/b, then shift using "shiftop" by "s" and pack
+#define dct_bfly32o(out0,out1, a,b,shiftop,s) \
+   { \
+	  dct_wadd(sum, a, b); \
+	  dct_wsub(dif, a, b); \
+	  out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
+	  out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
+   }
+
+#define dct_pass(shiftop, shift) \
+   { \
+	  /* even part */ \
+	  int16x8_t sum26 = vaddq_s16(row2, row6); \
+	  dct_long_mul(p1e, sum26, rot0_0); \
+	  dct_long_mac(t2e, p1e, row6, rot0_1); \
+	  dct_long_mac(t3e, p1e, row2, rot0_2); \
+	  int16x8_t sum04 = vaddq_s16(row0, row4); \
+	  int16x8_t dif04 = vsubq_s16(row0, row4); \
+	  dct_widen(t0e, sum04); \
+	  dct_widen(t1e, dif04); \
+	  dct_wadd(x0, t0e, t3e); \
+	  dct_wsub(x3, t0e, t3e); \
+	  dct_wadd(x1, t1e, t2e); \
+	  dct_wsub(x2, t1e, t2e); \
+	  /* odd part */ \
+	  int16x8_t sum15 = vaddq_s16(row1, row5); \
+	  int16x8_t sum17 = vaddq_s16(row1, row7); \
+	  int16x8_t sum35 = vaddq_s16(row3, row5); \
+	  int16x8_t sum37 = vaddq_s16(row3, row7); \
+	  int16x8_t sumodd = vaddq_s16(sum17, sum35); \
+	  dct_long_mul(p5o, sumodd, rot1_0); \
+	  dct_long_mac(p1o, p5o, sum17, rot1_1); \
+	  dct_long_mac(p2o, p5o, sum35, rot1_2); \
+	  dct_long_mul(p3o, sum37, rot2_0); \
+	  dct_long_mul(p4o, sum15, rot2_1); \
+	  dct_wadd(sump13o, p1o, p3o); \
+	  dct_wadd(sump24o, p2o, p4o); \
+	  dct_wadd(sump23o, p2o, p3o); \
+	  dct_wadd(sump14o, p1o, p4o); \
+	  dct_long_mac(x4, sump13o, row7, rot3_0); \
+	  dct_long_mac(x5, sump24o, row5, rot3_1); \
+	  dct_long_mac(x6, sump23o, row3, rot3_2); \
+	  dct_long_mac(x7, sump14o, row1, rot3_3); \
+	  dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
+	  dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
+	  dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
+	  dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
+   }
+
+   // load
+	row0 = vld1q_s16(data + 0 * 8);
+	row1 = vld1q_s16(data + 1 * 8);
+	row2 = vld1q_s16(data + 2 * 8);
+	row3 = vld1q_s16(data + 3 * 8);
+	row4 = vld1q_s16(data + 4 * 8);
+	row5 = vld1q_s16(data + 5 * 8);
+	row6 = vld1q_s16(data + 6 * 8);
+	row7 = vld1q_s16(data + 7 * 8);
+
+	// add DC bias
+	row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
+
+	// column pass
+	dct_pass(vrshrn_n_s32, 10);
+
+	// 16bit 8x8 transpose
+	{
+		// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
+		// whether compilers actually get this is another story, sadly.
+#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
+#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
+#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
+
+	  // pass 1
+		dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
+		dct_trn16(row2, row3);
+		dct_trn16(row4, row5);
+		dct_trn16(row6, row7);
+
+		// pass 2
+		dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
+		dct_trn32(row1, row3);
+		dct_trn32(row4, row6);
+		dct_trn32(row5, row7);
+
+		// pass 3
+		dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
+		dct_trn64(row1, row5);
+		dct_trn64(row2, row6);
+		dct_trn64(row3, row7);
+
+#undef dct_trn16
+#undef dct_trn32
+#undef dct_trn64
+	}
+
+	// row pass
+	// vrshrn_n_s32 only supports shifts up to 16, we need
+	// 17. so do a non-rounding shift of 16 first then follow
+	// up with a rounding shift by 1.
+	dct_pass(vshrn_n_s32, 16);
+
+	{
+		// pack and round
+		uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
+		uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
+		uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
+		uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
+		uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
+		uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
+		uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
+		uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
+
+		// again, these can translate into one instruction, but often don't.
+#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
+#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
+#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
+
+	  // sadly can't use interleaved stores here since we only write
+	  // 8 bytes to each scan line!
+
+	  // 8x8 8-bit transpose pass 1
+		dct_trn8_8(p0, p1);
+		dct_trn8_8(p2, p3);
+		dct_trn8_8(p4, p5);
+		dct_trn8_8(p6, p7);
+
+		// pass 2
+		dct_trn8_16(p0, p2);
+		dct_trn8_16(p1, p3);
+		dct_trn8_16(p4, p6);
+		dct_trn8_16(p5, p7);
+
+		// pass 3
+		dct_trn8_32(p0, p4);
+		dct_trn8_32(p1, p5);
+		dct_trn8_32(p2, p6);
+		dct_trn8_32(p3, p7);
+
+		// store
+		vst1_u8(out, p0); out += out_stride;
+		vst1_u8(out, p1); out += out_stride;
+		vst1_u8(out, p2); out += out_stride;
+		vst1_u8(out, p3); out += out_stride;
+		vst1_u8(out, p4); out += out_stride;
+		vst1_u8(out, p5); out += out_stride;
+		vst1_u8(out, p6); out += out_stride;
+		vst1_u8(out, p7);
+
+#undef dct_trn8_8
+#undef dct_trn8_16
+#undef dct_trn8_32
+	}
+
+#undef dct_long_mul
+#undef dct_long_mac
+#undef dct_widen
+#undef dct_wadd
+#undef dct_wsub
+#undef dct_bfly32o
+#undef dct_pass
+}
+
+#endif // STBI_NEON
+
+#define STBI__MARKER_none  0xff
+// if there's a pending marker from the entropy stream, return that
+// otherwise, fetch from the stream and get a marker. if there's no
+// marker, return 0xff, which is never a valid marker value
+static stbi_uc stbi__get_marker(stbi__jpeg * j)
+{
+	stbi_uc x;
+	if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
+	x = stbi__get8(j->s);
+	if (x != 0xff) return STBI__MARKER_none;
+	while (x == 0xff)
+		x = stbi__get8(j->s); // consume repeated 0xff fill bytes
+	return x;
+}
+
+// in each scan, we'll have scan_n components, and the order
+// of the components is specified by order[]
+#define STBI__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
+
+// after a restart interval, stbi__jpeg_reset the entropy decoder and
+// the dc prediction
+static void stbi__jpeg_reset(stbi__jpeg* j)
+{
+	j->code_bits = 0;
+	j->code_buffer = 0;
+	j->nomore = 0;
+	j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
+	j->marker = STBI__MARKER_none;
+	j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
+	j->eob_run = 0;
+	// no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
+	// since we don't even allow 1<<30 pixels
+}
+
+static int stbi__parse_entropy_coded_data(stbi__jpeg* z)
+{
+	stbi__jpeg_reset(z);
+	if (!z->progressive) {
+		if (z->scan_n == 1) {
+			int i, j;
+			STBI_SIMD_ALIGN(short, data[64]);
+			int n = z->order[0];
+			// non-interleaved data, we just need to process one block at a time,
+			// in trivial scanline order
+			// number of blocks to do just depends on how many actual "pixels" this
+			// component has, independent of interleaved MCU blocking and such
+			int w = (z->img_comp[n].x + 7) >> 3;
+			int h = (z->img_comp[n].y + 7) >> 3;
+			for (j = 0; j < h; ++j) {
+				for (i = 0; i < w; ++i) {
+					int ha = z->img_comp[n].ha;
+					if (!stbi__jpeg_decode_block(z, data, z->huff_dc + z->img_comp[n].hd, z->huff_ac + ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
+					z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2 * j * 8 + i * 8, z->img_comp[n].w2, data);
+					// every data block is an MCU, so countdown the restart interval
+					if (--z->todo <= 0) {
+						if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+						// if it's NOT a restart, then just bail, so we get corrupt data
+						// rather than no data
+						if (!STBI__RESTART(z->marker)) return 1;
+						stbi__jpeg_reset(z);
+					}
+				}
+			}
+			return 1;
+		}
+		else { // interleaved
+			int i, j, k, x, y;
+			STBI_SIMD_ALIGN(short, data[64]);
+			for (j = 0; j < z->img_mcu_y; ++j) {
+				for (i = 0; i < z->img_mcu_x; ++i) {
+					// scan an interleaved mcu... process scan_n components in order
+					for (k = 0; k < z->scan_n; ++k) {
+						int n = z->order[k];
+						// scan out an mcu's worth of this component; that's just determined
+						// by the basic H and V specified for the component
+						for (y = 0; y < z->img_comp[n].v; ++y) {
+							for (x = 0; x < z->img_comp[n].h; ++x) {
+								int x2 = (i * z->img_comp[n].h + x) * 8;
+								int y2 = (j * z->img_comp[n].v + y) * 8;
+								int ha = z->img_comp[n].ha;
+								if (!stbi__jpeg_decode_block(z, data, z->huff_dc + z->img_comp[n].hd, z->huff_ac + ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
+								z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2 * y2 + x2, z->img_comp[n].w2, data);
+							}
+						}
+					}
+					// after all interleaved components, that's an interleaved MCU,
+					// so now count down the restart interval
+					if (--z->todo <= 0) {
+						if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+						if (!STBI__RESTART(z->marker)) return 1;
+						stbi__jpeg_reset(z);
+					}
+				}
+			}
+			return 1;
+		}
+	}
+	else {
+		if (z->scan_n == 1) {
+			int i, j;
+			int n = z->order[0];
+			// non-interleaved data, we just need to process one block at a time,
+			// in trivial scanline order
+			// number of blocks to do just depends on how many actual "pixels" this
+			// component has, independent of interleaved MCU blocking and such
+			int w = (z->img_comp[n].x + 7) >> 3;
+			int h = (z->img_comp[n].y + 7) >> 3;
+			for (j = 0; j < h; ++j) {
+				for (i = 0; i < w; ++i) {
+					short* data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
+					if (z->spec_start == 0) {
+						if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
+							return 0;
+					}
+					else {
+						int ha = z->img_comp[n].ha;
+						if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
+							return 0;
+					}
+					// every data block is an MCU, so countdown the restart interval
+					if (--z->todo <= 0) {
+						if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+						if (!STBI__RESTART(z->marker)) return 1;
+						stbi__jpeg_reset(z);
+					}
+				}
+			}
+			return 1;
+		}
+		else { // interleaved
+			int i, j, k, x, y;
+			for (j = 0; j < z->img_mcu_y; ++j) {
+				for (i = 0; i < z->img_mcu_x; ++i) {
+					// scan an interleaved mcu... process scan_n components in order
+					for (k = 0; k < z->scan_n; ++k) {
+						int n = z->order[k];
+						// scan out an mcu's worth of this component; that's just determined
+						// by the basic H and V specified for the component
+						for (y = 0; y < z->img_comp[n].v; ++y) {
+							for (x = 0; x < z->img_comp[n].h; ++x) {
+								int x2 = (i * z->img_comp[n].h + x);
+								int y2 = (j * z->img_comp[n].v + y);
+								short* data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
+								if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
+									return 0;
+							}
+						}
+					}
+					// after all interleaved components, that's an interleaved MCU,
+					// so now count down the restart interval
+					if (--z->todo <= 0) {
+						if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
+						if (!STBI__RESTART(z->marker)) return 1;
+						stbi__jpeg_reset(z);
+					}
+				}
+			}
+			return 1;
+		}
+	}
+}
+
+static void stbi__jpeg_dequantize(short* data, stbi__uint16* dequant)
+{
+	int i;
+	for (i = 0; i < 64; ++i)
+		data[i] *= dequant[i];
+}
+
+static void stbi__jpeg_finish(stbi__jpeg* z)
+{
+	if (z->progressive) {
+		// dequantize and idct the data
+		int i, j, n;
+		for (n = 0; n < z->s->img_n; ++n) {
+			int w = (z->img_comp[n].x + 7) >> 3;
+			int h = (z->img_comp[n].y + 7) >> 3;
+			for (j = 0; j < h; ++j) {
+				for (i = 0; i < w; ++i) {
+					short* data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
+					stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
+					z->idct_block_kernel(z->img_comp[n].data + z->img_comp[n].w2 * j * 8 + i * 8, z->img_comp[n].w2, data);
+				}
+			}
+		}
+	}
+}
+
+static int stbi__process_marker(stbi__jpeg* z, int m)
+{
+	int L;
+	switch (m) {
+	case STBI__MARKER_none: // no marker found
+		return stbi__err("expected marker", "Corrupt JPEG");
+
+	case 0xDD: // DRI - specify restart interval
+		if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len", "Corrupt JPEG");
+		z->restart_interval = stbi__get16be(z->s);
+		return 1;
+
+	case 0xDB: // DQT - define quantization table
+		L = stbi__get16be(z->s) - 2;
+		while (L > 0) {
+			int q = stbi__get8(z->s);
+			int p = q >> 4, sixteen = (p != 0);
+			int t = q & 15, i;
+			if (p != 0 && p != 1) return stbi__err("bad DQT type", "Corrupt JPEG");
+			if (t > 3) return stbi__err("bad DQT table", "Corrupt JPEG");
+
+			for (i = 0; i < 64; ++i)
+				z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
+			L -= (sixteen ? 129 : 65);
+		}
+		return L == 0;
+
+	case 0xC4: // DHT - define huffman table
+		L = stbi__get16be(z->s) - 2;
+		while (L > 0) {
+			stbi_uc* v;
+			int sizes[16], i, n = 0;
+			int q = stbi__get8(z->s);
+			int tc = q >> 4;
+			int th = q & 15;
+			if (tc > 1 || th > 3) return stbi__err("bad DHT header", "Corrupt JPEG");
+			for (i = 0; i < 16; ++i) {
+				sizes[i] = stbi__get8(z->s);
+				n += sizes[i];
+			}
+			L -= 17;
+			if (tc == 0) {
+				if (!stbi__build_huffman(z->huff_dc + th, sizes)) return 0;
+				v = z->huff_dc[th].values;
+			}
+			else {
+				if (!stbi__build_huffman(z->huff_ac + th, sizes)) return 0;
+				v = z->huff_ac[th].values;
+			}
+			for (i = 0; i < n; ++i)
+				v[i] = stbi__get8(z->s);
+			if (tc != 0)
+				stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
+			L -= n;
+		}
+		return L == 0;
+	}
+
+	// check for comment block or APP blocks
+	if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
+		L = stbi__get16be(z->s);
+		if (L < 2) {
+			if (m == 0xFE)
+				return stbi__err("bad COM len", "Corrupt JPEG");
+			else
+				return stbi__err("bad APP len", "Corrupt JPEG");
+		}
+		L -= 2;
+
+		if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
+			static const unsigned char tag[5] = { 'J','F','I','F','\0' };
+			int ok = 1;
+			int i;
+			for (i = 0; i < 5; ++i)
+				if (stbi__get8(z->s) != tag[i])
+					ok = 0;
+			L -= 5;
+			if (ok)
+				z->jfif = 1;
+		}
+		else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
+			static const unsigned char tag[6] = { 'A','d','o','b','e','\0' };
+			int ok = 1;
+			int i;
+			for (i = 0; i < 6; ++i)
+				if (stbi__get8(z->s) != tag[i])
+					ok = 0;
+			L -= 6;
+			if (ok) {
+				stbi__get8(z->s); // version
+				stbi__get16be(z->s); // flags0
+				stbi__get16be(z->s); // flags1
+				z->app14_color_transform = stbi__get8(z->s); // color transform
+				L -= 6;
+			}
+		}
+
+		stbi__skip(z->s, L);
+		return 1;
+	}
+
+	return stbi__err("unknown marker", "Corrupt JPEG");
+}
+
+// after we see SOS
+static int stbi__process_scan_header(stbi__jpeg* z)
+{
+	int i;
+	int Ls = stbi__get16be(z->s);
+	z->scan_n = stbi__get8(z->s);
+	if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int)z->s->img_n) return stbi__err("bad SOS component count", "Corrupt JPEG");
+	if (Ls != 6 + 2 * z->scan_n) return stbi__err("bad SOS len", "Corrupt JPEG");
+	for (i = 0; i < z->scan_n; ++i) {
+		int id = stbi__get8(z->s), which;
+		int q = stbi__get8(z->s);
+		for (which = 0; which < z->s->img_n; ++which)
+			if (z->img_comp[which].id == id)
+				break;
+		if (which == z->s->img_n) return 0; // no match
+		z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff", "Corrupt JPEG");
+		z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff", "Corrupt JPEG");
+		z->order[i] = which;
+	}
+
+	{
+		int aa;
+		z->spec_start = stbi__get8(z->s);
+		z->spec_end = stbi__get8(z->s); // should be 63, but might be 0
+		aa = stbi__get8(z->s);
+		z->succ_high = (aa >> 4);
+		z->succ_low = (aa & 15);
+		if (z->progressive) {
+			if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
+				return stbi__err("bad SOS", "Corrupt JPEG");
+		}
+		else {
+			if (z->spec_start != 0) return stbi__err("bad SOS", "Corrupt JPEG");
+			if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS", "Corrupt JPEG");
+			z->spec_end = 63;
+		}
+	}
+
+	return 1;
+}
+
+static int stbi__free_jpeg_components(stbi__jpeg* z, int ncomp, int why)
+{
+	int i;
+	for (i = 0; i < ncomp; ++i) {
+		if (z->img_comp[i].raw_data) {
+			STBI_FREE(z->img_comp[i].raw_data);
+			z->img_comp[i].raw_data = NULL;
+			z->img_comp[i].data = NULL;
+		}
+		if (z->img_comp[i].raw_coeff) {
+			STBI_FREE(z->img_comp[i].raw_coeff);
+			z->img_comp[i].raw_coeff = 0;
+			z->img_comp[i].coeff = 0;
+		}
+		if (z->img_comp[i].linebuf) {
+			STBI_FREE(z->img_comp[i].linebuf);
+			z->img_comp[i].linebuf = NULL;
+		}
+	}
+	return why;
+}
+
+static int stbi__process_frame_header(stbi__jpeg* z, int scan)
+{
+	stbi__context* s = z->s;
+	int Lf, p, i, q, h_max = 1, v_max = 1, c;
+	Lf = stbi__get16be(s);         if (Lf < 11) return stbi__err("bad SOF len", "Corrupt JPEG"); // JPEG
+	p = stbi__get8(s);            if (p != 8) return stbi__err("only 8-bit", "JPEG format not supported: 8-bit only"); // JPEG baseline
+	s->img_y = stbi__get16be(s);   if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
+	s->img_x = stbi__get16be(s);   if (s->img_x == 0) return stbi__err("0 width", "Corrupt JPEG"); // JPEG requires
+	c = stbi__get8(s);
+	if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count", "Corrupt JPEG");
+	s->img_n = c;
+	for (i = 0; i < c; ++i) {
+		z->img_comp[i].data = NULL;
+		z->img_comp[i].linebuf = NULL;
+	}
+
+	if (Lf != 8 + 3 * s->img_n) return stbi__err("bad SOF len", "Corrupt JPEG");
+
+	z->rgb = 0;
+	for (i = 0; i < s->img_n; ++i) {
+		static const unsigned char rgb[3] = { 'R', 'G', 'B' };
+		z->img_comp[i].id = stbi__get8(s);
+		if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
+			++z->rgb;
+		q = stbi__get8(s);
+		z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H", "Corrupt JPEG");
+		z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V", "Corrupt JPEG");
+		z->img_comp[i].tq = stbi__get8(s);  if (z->img_comp[i].tq > 3) return stbi__err("bad TQ", "Corrupt JPEG");
+	}
+
+	if (scan != STBI__SCAN_load) return 1;
+
+	if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
+
+	for (i = 0; i < s->img_n; ++i) {
+		if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
+		if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
+	}
+
+	// compute interleaved mcu info
+	z->img_h_max = h_max;
+	z->img_v_max = v_max;
+	z->img_mcu_w = h_max * 8;
+	z->img_mcu_h = v_max * 8;
+	// these sizes can't be more than 17 bits
+	z->img_mcu_x = (s->img_x + z->img_mcu_w - 1) / z->img_mcu_w;
+	z->img_mcu_y = (s->img_y + z->img_mcu_h - 1) / z->img_mcu_h;
+
+	for (i = 0; i < s->img_n; ++i) {
+		// number of effective pixels (e.g. for non-interleaved MCU)
+		z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max - 1) / h_max;
+		z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max - 1) / v_max;
+		// to simplify generation, we'll allocate enough memory to decode
+		// the bogus oversized data from using interleaved MCUs and their
+		// big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
+		// discard the extra data until colorspace conversion
+		//
+		// img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
+		// so these muls can't overflow with 32-bit ints (which we require)
+		z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
+		z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
+		z->img_comp[i].coeff = 0;
+		z->img_comp[i].raw_coeff = 0;
+		z->img_comp[i].linebuf = NULL;
+		z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
+		if (z->img_comp[i].raw_data == NULL)
+			return stbi__free_jpeg_components(z, i + 1, stbi__err("outofmem", "Out of memory"));
+		// align blocks for idct using mmx/sse
+		z->img_comp[i].data = (stbi_uc*)(((size_t)z->img_comp[i].raw_data + 15) & ~15);
+		if (z->progressive) {
+			// w2, h2 are multiples of 8 (see above)
+			z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
+			z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
+			z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
+			if (z->img_comp[i].raw_coeff == NULL)
+				return stbi__free_jpeg_components(z, i + 1, stbi__err("outofmem", "Out of memory"));
+			z->img_comp[i].coeff = (short*)(((size_t)z->img_comp[i].raw_coeff + 15) & ~15);
+		}
+	}
+
+	return 1;
+}
+
+// use comparisons since in some cases we handle more than one case (e.g. SOF)
+#define stbi__DNL(x)         ((x) == 0xdc)
+#define stbi__SOI(x)         ((x) == 0xd8)
+#define stbi__EOI(x)         ((x) == 0xd9)
+#define stbi__SOF(x)         ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
+#define stbi__SOS(x)         ((x) == 0xda)
+
+#define stbi__SOF_progressive(x)   ((x) == 0xc2)
+
+static int stbi__decode_jpeg_header(stbi__jpeg* z, int scan)
+{
+	int m;
+	z->jfif = 0;
+	z->app14_color_transform = -1; // valid values are 0,1,2
+	z->marker = STBI__MARKER_none; // initialize cached marker to empty
+	m = stbi__get_marker(z);
+	if (!stbi__SOI(m)) return stbi__err("no SOI", "Corrupt JPEG");
+	if (scan == STBI__SCAN_type) return 1;
+	m = stbi__get_marker(z);
+	while (!stbi__SOF(m)) {
+		if (!stbi__process_marker(z, m)) return 0;
+		m = stbi__get_marker(z);
+		while (m == STBI__MARKER_none) {
+			// some files have extra padding after their blocks, so ok, we'll scan
+			if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
+			m = stbi__get_marker(z);
+		}
+	}
+	z->progressive = stbi__SOF_progressive(m);
+	if (!stbi__process_frame_header(z, scan)) return 0;
+	return 1;
+}
+
+// decode image to YCbCr format
+static int stbi__decode_jpeg_image(stbi__jpeg* j)
+{
+	int m;
+	for (m = 0; m < 4; m++) {
+		j->img_comp[m].raw_data = NULL;
+		j->img_comp[m].raw_coeff = NULL;
+	}
+	j->restart_interval = 0;
+	if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
+	m = stbi__get_marker(j);
+	while (!stbi__EOI(m)) {
+		if (stbi__SOS(m)) {
+			if (!stbi__process_scan_header(j)) return 0;
+			if (!stbi__parse_entropy_coded_data(j)) return 0;
+			if (j->marker == STBI__MARKER_none) {
+				// handle 0s at the end of image data from IP Kamera 9060
+				while (!stbi__at_eof(j->s)) {
+					int x = stbi__get8(j->s);
+					if (x == 255) {
+						j->marker = stbi__get8(j->s);
+						break;
+					}
+				}
+				// if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
+			}
+		}
+		else if (stbi__DNL(m)) {
+			int Ld = stbi__get16be(j->s);
+			stbi__uint32 NL = stbi__get16be(j->s);
+			if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG");
+			if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG");
+		}
+		else {
+			if (!stbi__process_marker(j, m)) return 0;
+		}
+		m = stbi__get_marker(j);
+	}
+	if (j->progressive)
+		stbi__jpeg_finish(j);
+	return 1;
+}
+
+// static jfif-centered resampling (across block boundaries)
+
+typedef stbi_uc* (*resample_row_func)(stbi_uc* out, stbi_uc* in0, stbi_uc* in1,
+	int w, int hs);
+
+#define stbi__div4(x) ((stbi_uc) ((x) >> 2))
+
+static stbi_uc* resample_row_1(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs)
+{
+	STBI_NOTUSED(out);
+	STBI_NOTUSED(in_far);
+	STBI_NOTUSED(w);
+	STBI_NOTUSED(hs);
+	return in_near;
+}
+
+static stbi_uc* stbi__resample_row_v_2(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs)
+{
+	// need to generate two samples vertically for every one in input
+	int i;
+	STBI_NOTUSED(hs);
+	for (i = 0; i < w; ++i)
+		out[i] = stbi__div4(3 * in_near[i] + in_far[i] + 2);
+	return out;
+}
+
+static stbi_uc* stbi__resample_row_h_2(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs)
+{
+	// need to generate two samples horizontally for every one in input
+	int i;
+	stbi_uc* input = in_near;
+
+	if (w == 1) {
+		// if only one sample, can't do any interpolation
+		out[0] = out[1] = input[0];
+		return out;
+	}
+
+	out[0] = input[0];
+	out[1] = stbi__div4(input[0] * 3 + input[1] + 2);
+	for (i = 1; i < w - 1; ++i) {
+		int n = 3 * input[i] + 2;
+		out[i * 2 + 0] = stbi__div4(n + input[i - 1]);
+		out[i * 2 + 1] = stbi__div4(n + input[i + 1]);
+	}
+	out[i * 2 + 0] = stbi__div4(input[w - 2] * 3 + input[w - 1] + 2);
+	out[i * 2 + 1] = input[w - 1];
+
+	STBI_NOTUSED(in_far);
+	STBI_NOTUSED(hs);
+
+	return out;
+}
+
+#define stbi__div16(x) ((stbi_uc) ((x) >> 4))
+
+static stbi_uc* stbi__resample_row_hv_2(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs)
+{
+	// need to generate 2x2 samples for every one in input
+	int i, t0, t1;
+	if (w == 1) {
+		out[0] = out[1] = stbi__div4(3 * in_near[0] + in_far[0] + 2);
+		return out;
+	}
+
+	t1 = 3 * in_near[0] + in_far[0];
+	out[0] = stbi__div4(t1 + 2);
+	for (i = 1; i < w; ++i) {
+		t0 = t1;
+		t1 = 3 * in_near[i] + in_far[i];
+		out[i * 2 - 1] = stbi__div16(3 * t0 + t1 + 8);
+		out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
+	}
+	out[w * 2 - 1] = stbi__div4(t1 + 2);
+
+	STBI_NOTUSED(hs);
+
+	return out;
+}
+
+#if defined(STBI_SSE2) || defined(STBI_NEON)
+static stbi_uc* stbi__resample_row_hv_2_simd(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs)
+{
+	// need to generate 2x2 samples for every one in input
+	int i = 0, t0, t1;
+
+	if (w == 1) {
+		out[0] = out[1] = stbi__div4(3 * in_near[0] + in_far[0] + 2);
+		return out;
+	}
+
+	t1 = 3 * in_near[0] + in_far[0];
+	// process groups of 8 pixels for as long as we can.
+	// note we can't handle the last pixel in a row in this loop
+	// because we need to handle the filter boundary conditions.
+	for (; i < ((w - 1) & ~7); i += 8) {
+#if defined(STBI_SSE2)
+		// load and perform the vertical filtering pass
+		// this uses 3*x + y = 4*x + (y - x)
+		__m128i zero = _mm_setzero_si128();
+		__m128i farb = _mm_loadl_epi64((__m128i*) (in_far + i));
+		__m128i nearb = _mm_loadl_epi64((__m128i*) (in_near + i));
+		__m128i farw = _mm_unpacklo_epi8(farb, zero);
+		__m128i nearw = _mm_unpacklo_epi8(nearb, zero);
+		__m128i diff = _mm_sub_epi16(farw, nearw);
+		__m128i nears = _mm_slli_epi16(nearw, 2);
+		__m128i curr = _mm_add_epi16(nears, diff); // current row
+
+		// horizontal filter works the same based on shifted vers of current
+		// row. "prev" is current row shifted right by 1 pixel; we need to
+		// insert the previous pixel value (from t1).
+		// "next" is current row shifted left by 1 pixel, with first pixel
+		// of next block of 8 pixels added in.
+		__m128i prv0 = _mm_slli_si128(curr, 2);
+		__m128i nxt0 = _mm_srli_si128(curr, 2);
+		__m128i prev = _mm_insert_epi16(prv0, t1, 0);
+		__m128i next = _mm_insert_epi16(nxt0, 3 * in_near[i + 8] + in_far[i + 8], 7);
+
+		// horizontal filter, polyphase implementation since it's convenient:
+		// even pixels = 3*cur + prev = cur*4 + (prev - cur)
+		// odd  pixels = 3*cur + next = cur*4 + (next - cur)
+		// note the shared term.
+		__m128i bias = _mm_set1_epi16(8);
+		__m128i curs = _mm_slli_epi16(curr, 2);
+		__m128i prvd = _mm_sub_epi16(prev, curr);
+		__m128i nxtd = _mm_sub_epi16(next, curr);
+		__m128i curb = _mm_add_epi16(curs, bias);
+		__m128i even = _mm_add_epi16(prvd, curb);
+		__m128i odd = _mm_add_epi16(nxtd, curb);
+
+		// interleave even and odd pixels, then undo scaling.
+		__m128i int0 = _mm_unpacklo_epi16(even, odd);
+		__m128i int1 = _mm_unpackhi_epi16(even, odd);
+		__m128i de0 = _mm_srli_epi16(int0, 4);
+		__m128i de1 = _mm_srli_epi16(int1, 4);
+
+		// pack and write output
+		__m128i outv = _mm_packus_epi16(de0, de1);
+		_mm_storeu_si128((__m128i*) (out + i * 2), outv);
+#elif defined(STBI_NEON)
+		// load and perform the vertical filtering pass
+		// this uses 3*x + y = 4*x + (y - x)
+		uint8x8_t farb = vld1_u8(in_far + i);
+		uint8x8_t nearb = vld1_u8(in_near + i);
+		int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
+		int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
+		int16x8_t curr = vaddq_s16(nears, diff); // current row
+
+		// horizontal filter works the same based on shifted vers of current
+		// row. "prev" is current row shifted right by 1 pixel; we need to
+		// insert the previous pixel value (from t1).
+		// "next" is current row shifted left by 1 pixel, with first pixel
+		// of next block of 8 pixels added in.
+		int16x8_t prv0 = vextq_s16(curr, curr, 7);
+		int16x8_t nxt0 = vextq_s16(curr, curr, 1);
+		int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
+		int16x8_t next = vsetq_lane_s16(3 * in_near[i + 8] + in_far[i + 8], nxt0, 7);
+
+		// horizontal filter, polyphase implementation since it's convenient:
+		// even pixels = 3*cur + prev = cur*4 + (prev - cur)
+		// odd  pixels = 3*cur + next = cur*4 + (next - cur)
+		// note the shared term.
+		int16x8_t curs = vshlq_n_s16(curr, 2);
+		int16x8_t prvd = vsubq_s16(prev, curr);
+		int16x8_t nxtd = vsubq_s16(next, curr);
+		int16x8_t even = vaddq_s16(curs, prvd);
+		int16x8_t odd = vaddq_s16(curs, nxtd);
+
+		// undo scaling and round, then store with even/odd phases interleaved
+		uint8x8x2_t o;
+		o.val[0] = vqrshrun_n_s16(even, 4);
+		o.val[1] = vqrshrun_n_s16(odd, 4);
+		vst2_u8(out + i * 2, o);
+#endif
+
+		// "previous" value for next iter
+		t1 = 3 * in_near[i + 7] + in_far[i + 7];
+	}
+
+	t0 = t1;
+	t1 = 3 * in_near[i] + in_far[i];
+	out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
+
+	for (++i; i < w; ++i) {
+		t0 = t1;
+		t1 = 3 * in_near[i] + in_far[i];
+		out[i * 2 - 1] = stbi__div16(3 * t0 + t1 + 8);
+		out[i * 2] = stbi__div16(3 * t1 + t0 + 8);
+	}
+	out[w * 2 - 1] = stbi__div4(t1 + 2);
+
+	STBI_NOTUSED(hs);
+
+	return out;
+}
+#endif
+
+static stbi_uc* stbi__resample_row_generic(stbi_uc* out, stbi_uc* in_near, stbi_uc* in_far, int w, int hs)
+{
+	// resample with nearest-neighbor
+	int i, j;
+	STBI_NOTUSED(in_far);
+	for (i = 0; i < w; ++i)
+		for (j = 0; j < hs; ++j)
+			out[i * hs + j] = in_near[i];
+	return out;
+}
+
+// this is a reduced-precision calculation of YCbCr-to-RGB introduced
+// to make sure the code produces the same results in both SIMD and scalar
+#define stbi__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
+static void stbi__YCbCr_to_RGB_row(stbi_uc* out, const stbi_uc* y, const stbi_uc* pcb, const stbi_uc* pcr, int count, int step)
+{
+	int i;
+	for (i = 0; i < count; ++i) {
+		int y_fixed = (y[i] << 20) + (1 << 19); // rounding
+		int r, g, b;
+		int cr = pcr[i] - 128;
+		int cb = pcb[i] - 128;
+		r = y_fixed + cr * stbi__float2fixed(1.40200f);
+		g = y_fixed + (cr * -stbi__float2fixed(0.71414f)) + ((cb * -stbi__float2fixed(0.34414f)) & 0xffff0000);
+		b = y_fixed + cb * stbi__float2fixed(1.77200f);
+		r >>= 20;
+		g >>= 20;
+		b >>= 20;
+		if ((unsigned)r > 255) { if (r < 0) r = 0; else r = 255; }
+		if ((unsigned)g > 255) { if (g < 0) g = 0; else g = 255; }
+		if ((unsigned)b > 255) { if (b < 0) b = 0; else b = 255; }
+		out[0] = (stbi_uc)r;
+		out[1] = (stbi_uc)g;
+		out[2] = (stbi_uc)b;
+		out[3] = 255;
+		out += step;
+	}
+}
+
+#if defined(STBI_SSE2) || defined(STBI_NEON)
+static void stbi__YCbCr_to_RGB_simd(stbi_uc* out, stbi_uc const* y, stbi_uc const* pcb, stbi_uc const* pcr, int count, int step)
+{
+	int i = 0;
+
+#ifdef STBI_SSE2
+	// step == 3 is pretty ugly on the final interleave, and i'm not convinced
+	// it's useful in practice (you wouldn't use it for textures, for example).
+	// so just accelerate step == 4 case.
+	if (step == 4) {
+		// this is a fairly straightforward implementation and not super-optimized.
+		__m128i signflip = _mm_set1_epi8(-0x80);
+		__m128i cr_const0 = _mm_set1_epi16((short)(1.40200f * 4096.0f + 0.5f));
+		__m128i cr_const1 = _mm_set1_epi16(-(short)(0.71414f * 4096.0f + 0.5f));
+		__m128i cb_const0 = _mm_set1_epi16(-(short)(0.34414f * 4096.0f + 0.5f));
+		__m128i cb_const1 = _mm_set1_epi16((short)(1.77200f * 4096.0f + 0.5f));
+		__m128i y_bias = _mm_set1_epi8((char)(unsigned char)128);
+		__m128i xw = _mm_set1_epi16(255); // alpha channel
+
+		for (; i + 7 < count; i += 8) {
+			// load
+			__m128i y_bytes = _mm_loadl_epi64((__m128i*) (y + i));
+			__m128i cr_bytes = _mm_loadl_epi64((__m128i*) (pcr + i));
+			__m128i cb_bytes = _mm_loadl_epi64((__m128i*) (pcb + i));
+			__m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
+			__m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
+
+			// unpack to short (and left-shift cr, cb by 8)
+			__m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes);
+			__m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
+			__m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
+
+			// color transform
+			__m128i yws = _mm_srli_epi16(yw, 4);
+			__m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
+			__m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
+			__m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
+			__m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
+			__m128i rws = _mm_add_epi16(cr0, yws);
+			__m128i gwt = _mm_add_epi16(cb0, yws);
+			__m128i bws = _mm_add_epi16(yws, cb1);
+			__m128i gws = _mm_add_epi16(gwt, cr1);
+
+			// descale
+			__m128i rw = _mm_srai_epi16(rws, 4);
+			__m128i bw = _mm_srai_epi16(bws, 4);
+			__m128i gw = _mm_srai_epi16(gws, 4);
+
+			// back to byte, set up for transpose
+			__m128i brb = _mm_packus_epi16(rw, bw);
+			__m128i gxb = _mm_packus_epi16(gw, xw);
+
+			// transpose to interleave channels
+			__m128i t0 = _mm_unpacklo_epi8(brb, gxb);
+			__m128i t1 = _mm_unpackhi_epi8(brb, gxb);
+			__m128i o0 = _mm_unpacklo_epi16(t0, t1);
+			__m128i o1 = _mm_unpackhi_epi16(t0, t1);
+
+			// store
+			_mm_storeu_si128((__m128i*) (out + 0), o0);
+			_mm_storeu_si128((__m128i*) (out + 16), o1);
+			out += 32;
+		}
+	}
+#endif
+
+#ifdef STBI_NEON
+	// in this version, step=3 support would be easy to add. but is there demand?
+	if (step == 4) {
+		// this is a fairly straightforward implementation and not super-optimized.
+		uint8x8_t signflip = vdup_n_u8(0x80);
+		int16x8_t cr_const0 = vdupq_n_s16((short)(1.40200f * 4096.0f + 0.5f));
+		int16x8_t cr_const1 = vdupq_n_s16(-(short)(0.71414f * 4096.0f + 0.5f));
+		int16x8_t cb_const0 = vdupq_n_s16(-(short)(0.34414f * 4096.0f + 0.5f));
+		int16x8_t cb_const1 = vdupq_n_s16((short)(1.77200f * 4096.0f + 0.5f));
+
+		for (; i + 7 < count; i += 8) {
+			// load
+			uint8x8_t y_bytes = vld1_u8(y + i);
+			uint8x8_t cr_bytes = vld1_u8(pcr + i);
+			uint8x8_t cb_bytes = vld1_u8(pcb + i);
+			int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
+			int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
+
+			// expand to s16
+			int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
+			int16x8_t crw = vshll_n_s8(cr_biased, 7);
+			int16x8_t cbw = vshll_n_s8(cb_biased, 7);
+
+			// color transform
+			int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
+			int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
+			int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
+			int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
+			int16x8_t rws = vaddq_s16(yws, cr0);
+			int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
+			int16x8_t bws = vaddq_s16(yws, cb1);
+
+			// undo scaling, round, convert to byte
+			uint8x8x4_t o;
+			o.val[0] = vqrshrun_n_s16(rws, 4);
+			o.val[1] = vqrshrun_n_s16(gws, 4);
+			o.val[2] = vqrshrun_n_s16(bws, 4);
+			o.val[3] = vdup_n_u8(255);
+
+			// store, interleaving r/g/b/a
+			vst4_u8(out, o);
+			out += 8 * 4;
+		}
+	}
+#endif
+
+	for (; i < count; ++i) {
+		int y_fixed = (y[i] << 20) + (1 << 19); // rounding
+		int r, g, b;
+		int cr = pcr[i] - 128;
+		int cb = pcb[i] - 128;
+		r = y_fixed + cr * stbi__float2fixed(1.40200f);
+		g = y_fixed + cr * -stbi__float2fixed(0.71414f) + ((cb * -stbi__float2fixed(0.34414f)) & 0xffff0000);
+		b = y_fixed + cb * stbi__float2fixed(1.77200f);
+		r >>= 20;
+		g >>= 20;
+		b >>= 20;
+		if ((unsigned)r > 255) { if (r < 0) r = 0; else r = 255; }
+		if ((unsigned)g > 255) { if (g < 0) g = 0; else g = 255; }
+		if ((unsigned)b > 255) { if (b < 0) b = 0; else b = 255; }
+		out[0] = (stbi_uc)r;
+		out[1] = (stbi_uc)g;
+		out[2] = (stbi_uc)b;
+		out[3] = 255;
+		out += step;
+	}
+}
+#endif
+
+// set up the kernels
+static void stbi__setup_jpeg(stbi__jpeg* j)
+{
+	j->idct_block_kernel = stbi__idct_block;
+	j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
+	j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
+
+#ifdef STBI_SSE2
+	if (stbi__sse2_available()) {
+		j->idct_block_kernel = stbi__idct_simd;
+		j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
+		j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
+	}
+#endif
+
+#ifdef STBI_NEON
+	j->idct_block_kernel = stbi__idct_simd;
+	j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
+	j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
+#endif
+}
+
+// clean up the temporary component buffers
+static void stbi__cleanup_jpeg(stbi__jpeg* j)
+{
+	stbi__free_jpeg_components(j, j->s->img_n, 0);
+}
+
+typedef struct
+{
+	resample_row_func resample;
+	stbi_uc* line0, * line1;
+	int hs, vs;   // expansion factor in each axis
+	int w_lores; // horizontal pixels pre-expansion
+	int ystep;   // how far through vertical expansion we are
+	int ypos;    // which pre-expansion row we're on
+} stbi__resample;
+
+// fast 0..255 * 0..255 => 0..255 rounded multiplication
+static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
+{
+	unsigned int t = x * y + 128;
+	return (stbi_uc)((t + (t >> 8)) >> 8);
+}
+
+static stbi_uc* load_jpeg_image(stbi__jpeg* z, int* out_x, int* out_y, int* comp, int req_comp)
+{
+	int n, decode_n, is_rgb;
+	z->s->img_n = 0; // make stbi__cleanup_jpeg safe
+
+	// validate req_comp
+	if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
+
+	// load a jpeg image from whichever source, but leave in YCbCr format
+	if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
+
+	// determine actual number of components to generate
+	n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
+
+	is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
+
+	if (z->s->img_n == 3 && n < 3 && !is_rgb)
+		decode_n = 1;
+	else
+		decode_n = z->s->img_n;
+
+	// resample and color-convert
+	{
+		int k;
+		unsigned int i, j;
+		stbi_uc* output;
+		stbi_uc* coutput[4];
+
+		stbi__resample res_comp[4];
+
+		for (k = 0; k < decode_n; ++k) {
+			stbi__resample* r = &res_comp[k];
+
+			// allocate line buffer big enough for upsampling off the edges
+			// with upsample factor of 4
+			z->img_comp[k].linebuf = (stbi_uc*)stbi__malloc(z->s->img_x + 3);
+			if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
+
+			r->hs = z->img_h_max / z->img_comp[k].h;
+			r->vs = z->img_v_max / z->img_comp[k].v;
+			r->ystep = r->vs >> 1;
+			r->w_lores = (z->s->img_x + r->hs - 1) / r->hs;
+			r->ypos = 0;
+			r->line0 = r->line1 = z->img_comp[k].data;
+
+			if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
+			else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
+			else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
+			else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
+			else                               r->resample = stbi__resample_row_generic;
+		}
+
+		// can't error after this so, this is safe
+		output = (stbi_uc*)stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
+		if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
+
+		// now go ahead and resample
+		for (j = 0; j < z->s->img_y; ++j) {
+			stbi_uc* out = output + n * z->s->img_x * j;
+			for (k = 0; k < decode_n; ++k) {
+				stbi__resample* r = &res_comp[k];
+				int y_bot = r->ystep >= (r->vs >> 1);
+				coutput[k] = r->resample(z->img_comp[k].linebuf,
+					y_bot ? r->line1 : r->line0,
+					y_bot ? r->line0 : r->line1,
+					r->w_lores, r->hs);
+				if (++r->ystep >= r->vs) {
+					r->ystep = 0;
+					r->line0 = r->line1;
+					if (++r->ypos < z->img_comp[k].y)
+						r->line1 += z->img_comp[k].w2;
+				}
+			}
+			if (n >= 3) {
+				stbi_uc* y = coutput[0];
+				if (z->s->img_n == 3) {
+					if (is_rgb) {
+						for (i = 0; i < z->s->img_x; ++i) {
+							out[0] = y[i];
+							out[1] = coutput[1][i];
+							out[2] = coutput[2][i];
+							out[3] = 255;
+							out += n;
+						}
+					}
+					else {
+						z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+					}
+				}
+				else if (z->s->img_n == 4) {
+					if (z->app14_color_transform == 0) { // CMYK
+						for (i = 0; i < z->s->img_x; ++i) {
+							stbi_uc m = coutput[3][i];
+							out[0] = stbi__blinn_8x8(coutput[0][i], m);
+							out[1] = stbi__blinn_8x8(coutput[1][i], m);
+							out[2] = stbi__blinn_8x8(coutput[2][i], m);
+							out[3] = 255;
+							out += n;
+						}
+					}
+					else if (z->app14_color_transform == 2) { // YCCK
+						z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+						for (i = 0; i < z->s->img_x; ++i) {
+							stbi_uc m = coutput[3][i];
+							out[0] = stbi__blinn_8x8(255 - out[0], m);
+							out[1] = stbi__blinn_8x8(255 - out[1], m);
+							out[2] = stbi__blinn_8x8(255 - out[2], m);
+							out += n;
+						}
+					}
+					else { // YCbCr + alpha?  Ignore the fourth channel for now
+						z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
+					}
+				}
+				else
+					for (i = 0; i < z->s->img_x; ++i) {
+						out[0] = out[1] = out[2] = y[i];
+						out[3] = 255; // not used if n==3
+						out += n;
+					}
+			}
+			else {
+				if (is_rgb) {
+					if (n == 1)
+						for (i = 0; i < z->s->img_x; ++i)
+							* out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
+					else {
+						for (i = 0; i < z->s->img_x; ++i, out += 2) {
+							out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
+							out[1] = 255;
+						}
+					}
+				}
+				else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
+					for (i = 0; i < z->s->img_x; ++i) {
+						stbi_uc m = coutput[3][i];
+						stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
+						stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
+						stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
+						out[0] = stbi__compute_y(r, g, b);
+						out[1] = 255;
+						out += n;
+					}
+				}
+				else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
+					for (i = 0; i < z->s->img_x; ++i) {
+						out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
+						out[1] = 255;
+						out += n;
+					}
+				}
+				else {
+					stbi_uc* y = coutput[0];
+					if (n == 1)
+						for (i = 0; i < z->s->img_x; ++i) out[i] = y[i];
+					else
+						for (i = 0; i < z->s->img_x; ++i)* out++ = y[i], * out++ = 255;
+				}
+			}
+		}
+		stbi__cleanup_jpeg(z);
+		*out_x = z->s->img_x;
+		*out_y = z->s->img_y;
+		if (comp)* comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
+		return output;
+	}
+}
+
+static void* stbi__jpeg_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri)
+{
+	unsigned char* result;
+	stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
+	STBI_NOTUSED(ri);
+	j->s = s;
+	stbi__setup_jpeg(j);
+	result = load_jpeg_image(j, x, y, comp, req_comp);
+	STBI_FREE(j);
+	return result;
+}
+
+static int stbi__jpeg_test(stbi__context* s)
+{
+	int r;
+	stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
+	j->s = s;
+	stbi__setup_jpeg(j);
+	r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
+	stbi__rewind(s);
+	STBI_FREE(j);
+	return r;
+}
+
+static int stbi__jpeg_info_raw(stbi__jpeg* j, int* x, int* y, int* comp)
+{
+	if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
+		stbi__rewind(j->s);
+		return 0;
+	}
+	if (x)* x = j->s->img_x;
+	if (y)* y = j->s->img_y;
+	if (comp)* comp = j->s->img_n >= 3 ? 3 : 1;
+	return 1;
+}
+
+static int stbi__jpeg_info(stbi__context* s, int* x, int* y, int* comp)
+{
+	int result;
+	stbi__jpeg* j = (stbi__jpeg*)(stbi__malloc(sizeof(stbi__jpeg)));
+	j->s = s;
+	result = stbi__jpeg_info_raw(j, x, y, comp);
+	STBI_FREE(j);
+	return result;
+}
+#endif
+
+// public domain zlib decode    v0.2  Sean Barrett 2006-11-18
+//    simple implementation
+//      - all input must be provided in an upfront buffer
+//      - all output is written to a single output buffer (can malloc/realloc)
+//    performance
+//      - fast huffman
+
+#ifndef STBI_NO_ZLIB
+
+// fast-way is faster to check than jpeg huffman, but slow way is slower
+#define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
+#define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)
+
+// zlib-style huffman encoding
+// (jpegs packs from left, zlib from right, so can't share code)
+typedef struct
+{
+	stbi__uint16 fast[1 << STBI__ZFAST_BITS];
+	stbi__uint16 firstcode[16];
+	int maxcode[17];
+	stbi__uint16 firstsymbol[16];
+	stbi_uc  size[288];
+	stbi__uint16 value[288];
+} stbi__zhuffman;
+
+stbi_inline static int stbi__bitreverse16(int n)
+{
+	n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1);
+	n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2);
+	n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4);
+	n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8);
+	return n;
+}
+
+stbi_inline static int stbi__bit_reverse(int v, int bits)
+{
+	STBI_ASSERT(bits <= 16);
+	// to bit reverse n bits, reverse 16 and shift
+	// e.g. 11 bits, bit reverse and shift away 5
+	return stbi__bitreverse16(v) >> (16 - bits);
+}
+
+static int stbi__zbuild_huffman(stbi__zhuffman* z, const stbi_uc* sizelist, int num)
+{
+	int i, k = 0;
+	int code, next_code[16], sizes[17];
+
+	// DEFLATE spec for generating codes
+	memset(sizes, 0, sizeof(sizes));
+	memset(z->fast, 0, sizeof(z->fast));
+	for (i = 0; i < num; ++i)
+		++sizes[sizelist[i]];
+	sizes[0] = 0;
+	for (i = 1; i < 16; ++i)
+		if (sizes[i] > (1 << i))
+			return stbi__err("bad sizes", "Corrupt PNG");
+	code = 0;
+	for (i = 1; i < 16; ++i) {
+		next_code[i] = code;
+		z->firstcode[i] = (stbi__uint16)code;
+		z->firstsymbol[i] = (stbi__uint16)k;
+		code = (code + sizes[i]);
+		if (sizes[i])
+			if (code - 1 >= (1 << i)) return stbi__err("bad codelengths", "Corrupt PNG");
+		z->maxcode[i] = code << (16 - i); // preshift for inner loop
+		code <<= 1;
+		k += sizes[i];
+	}
+	z->maxcode[16] = 0x10000; // sentinel
+	for (i = 0; i < num; ++i) {
+		int s = sizelist[i];
+		if (s) {
+			int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
+			stbi__uint16 fastv = (stbi__uint16)((s << 9) | i);
+			z->size[c] = (stbi_uc)s;
+			z->value[c] = (stbi__uint16)i;
+			if (s <= STBI__ZFAST_BITS) {
+				int j = stbi__bit_reverse(next_code[s], s);
+				while (j < (1 << STBI__ZFAST_BITS)) {
+					z->fast[j] = fastv;
+					j += (1 << s);
+				}
+			}
+			++next_code[s];
+		}
+	}
+	return 1;
+}
+
+// zlib-from-memory implementation for PNG reading
+//    because PNG allows splitting the zlib stream arbitrarily,
+//    and it's annoying structurally to have PNG call ZLIB call PNG,
+//    we require PNG read all the IDATs and combine them into a single
+//    memory buffer
+
+typedef struct
+{
+	stbi_uc* zbuffer, * zbuffer_end;
+	int num_bits;
+	stbi__uint32 code_buffer;
+
+	char* zout;
+	char* zout_start;
+	char* zout_end;
+	int   z_expandable;
+
+	stbi__zhuffman z_length, z_distance;
+} stbi__zbuf;
+
+stbi_inline static stbi_uc stbi__zget8(stbi__zbuf* z)
+{
+	if (z->zbuffer >= z->zbuffer_end) return 0;
+	return *z->zbuffer++;
+}
+
+static void stbi__fill_bits(stbi__zbuf* z)
+{
+	do {
+		STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
+		z->code_buffer |= (unsigned int)stbi__zget8(z) << z->num_bits;
+		z->num_bits += 8;
+	} while (z->num_bits <= 24);
+}
+
+stbi_inline static unsigned int stbi__zreceive(stbi__zbuf* z, int n)
+{
+	unsigned int k;
+	if (z->num_bits < n) stbi__fill_bits(z);
+	k = z->code_buffer & ((1 << n) - 1);
+	z->code_buffer >>= n;
+	z->num_bits -= n;
+	return k;
+}
+
+static int stbi__zhuffman_decode_slowpath(stbi__zbuf* a, stbi__zhuffman* z)
+{
+	int b, s, k;
+	// not resolved by fast table, so compute it the slow way
+	// use jpeg approach, which requires MSbits at top
+	k = stbi__bit_reverse(a->code_buffer, 16);
+	for (s = STBI__ZFAST_BITS + 1; ; ++s)
+		if (k < z->maxcode[s])
+			break;
+	if (s == 16) return -1; // invalid code!
+	// code size is s, so:
+	b = (k >> (16 - s)) - z->firstcode[s] + z->firstsymbol[s];
+	STBI_ASSERT(z->size[b] == s);
+	a->code_buffer >>= s;
+	a->num_bits -= s;
+	return z->value[b];
+}
+
+stbi_inline static int stbi__zhuffman_decode(stbi__zbuf* a, stbi__zhuffman* z)
+{
+	int b, s;
+	if (a->num_bits < 16) stbi__fill_bits(a);
+	b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
+	if (b) {
+		s = b >> 9;
+		a->code_buffer >>= s;
+		a->num_bits -= s;
+		return b & 511;
+	}
+	return stbi__zhuffman_decode_slowpath(a, z);
+}
+
+static int stbi__zexpand(stbi__zbuf* z, char* zout, int n)  // need to make room for n bytes
+{
+	char* q;
+	int cur, limit, old_limit;
+	z->zout = zout;
+	if (!z->z_expandable) return stbi__err("output buffer limit", "Corrupt PNG");
+	cur = (int)(z->zout - z->zout_start);
+	limit = old_limit = (int)(z->zout_end - z->zout_start);
+	while (cur + n > limit)
+		limit *= 2;
+	q = (char*)STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
+	STBI_NOTUSED(old_limit);
+	if (q == NULL) return stbi__err("outofmem", "Out of memory");
+	z->zout_start = q;
+	z->zout = q + cur;
+	z->zout_end = q + limit;
+	return 1;
+}
+
+static const int stbi__zlength_base[31] = {
+   3,4,5,6,7,8,9,10,11,13,
+   15,17,19,23,27,31,35,43,51,59,
+   67,83,99,115,131,163,195,227,258,0,0 };
+
+static const int stbi__zlength_extra[31] =
+{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
+
+static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
+257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0 };
+
+static const int stbi__zdist_extra[32] =
+{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 };
+
+static int stbi__parse_huffman_block(stbi__zbuf* a)
+{
+	char* zout = a->zout;
+	for (;;) {
+		int z = stbi__zhuffman_decode(a, &a->z_length);
+		if (z < 256) {
+			if (z < 0) return stbi__err("bad huffman code", "Corrupt PNG"); // error in huffman codes
+			if (zout >= a->zout_end) {
+				if (!stbi__zexpand(a, zout, 1)) return 0;
+				zout = a->zout;
+			}
+			*zout++ = (char)z;
+		}
+		else {
+			stbi_uc* p;
+			int len, dist;
+			if (z == 256) {
+				a->zout = zout;
+				return 1;
+			}
+			z -= 257;
+			len = stbi__zlength_base[z];
+			if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
+			z = stbi__zhuffman_decode(a, &a->z_distance);
+			if (z < 0) return stbi__err("bad huffman code", "Corrupt PNG");
+			dist = stbi__zdist_base[z];
+			if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
+			if (zout - a->zout_start < dist) return stbi__err("bad dist", "Corrupt PNG");
+			if (zout + len > a->zout_end) {
+				if (!stbi__zexpand(a, zout, len)) return 0;
+				zout = a->zout;
+			}
+			p = (stbi_uc*)(zout - dist);
+			if (dist == 1) { // run of one byte; common in images.
+				stbi_uc v = *p;
+				if (len) { do *zout++ = v; while (--len); }
+			}
+			else {
+				if (len) { do *zout++ = *p++; while (--len); }
+			}
+		}
+	}
+}
+
+static int stbi__compute_huffman_codes(stbi__zbuf* a)
+{
+	static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
+	stbi__zhuffman z_codelength;
+	stbi_uc lencodes[286 + 32 + 137];//padding for maximum single op
+	stbi_uc codelength_sizes[19];
+	int i, n;
+
+	int hlit = stbi__zreceive(a, 5) + 257;
+	int hdist = stbi__zreceive(a, 5) + 1;
+	int hclen = stbi__zreceive(a, 4) + 4;
+	int ntot = hlit + hdist;
+
+	memset(codelength_sizes, 0, sizeof(codelength_sizes));
+	for (i = 0; i < hclen; ++i) {
+		int s = stbi__zreceive(a, 3);
+		codelength_sizes[length_dezigzag[i]] = (stbi_uc)s;
+	}
+	if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
+
+	n = 0;
+	while (n < ntot) {
+		int c = stbi__zhuffman_decode(a, &z_codelength);
+		if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
+		if (c < 16)
+			lencodes[n++] = (stbi_uc)c;
+		else {
+			stbi_uc fill = 0;
+			if (c == 16) {
+				c = stbi__zreceive(a, 2) + 3;
+				if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
+				fill = lencodes[n - 1];
+			}
+			else if (c == 17)
+				c = stbi__zreceive(a, 3) + 3;
+			else {
+				STBI_ASSERT(c == 18);
+				c = stbi__zreceive(a, 7) + 11;
+			}
+			if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
+			memset(lencodes + n, fill, c);
+			n += c;
+		}
+	}
+	if (n != ntot) return stbi__err("bad codelengths", "Corrupt PNG");
+	if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
+	if (!stbi__zbuild_huffman(&a->z_distance, lencodes + hlit, hdist)) return 0;
+	return 1;
+}
+
+static int stbi__parse_uncompressed_block(stbi__zbuf* a)
+{
+	stbi_uc header[4];
+	int len, nlen, k;
+	if (a->num_bits & 7)
+		stbi__zreceive(a, a->num_bits & 7); // discard
+	 // drain the bit-packed data into header
+	k = 0;
+	while (a->num_bits > 0) {
+		header[k++] = (stbi_uc)(a->code_buffer & 255); // suppress MSVC run-time check
+		a->code_buffer >>= 8;
+		a->num_bits -= 8;
+	}
+	STBI_ASSERT(a->num_bits == 0);
+	// now fill header the normal way
+	while (k < 4)
+		header[k++] = stbi__zget8(a);
+	len = header[1] * 256 + header[0];
+	nlen = header[3] * 256 + header[2];
+	if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt", "Corrupt PNG");
+	if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer", "Corrupt PNG");
+	if (a->zout + len > a->zout_end)
+		if (!stbi__zexpand(a, a->zout, len)) return 0;
+	memcpy(a->zout, a->zbuffer, len);
+	a->zbuffer += len;
+	a->zout += len;
+	return 1;
+}
+
+static int stbi__parse_zlib_header(stbi__zbuf* a)
+{
+	int cmf = stbi__zget8(a);
+	int cm = cmf & 15;
+	/* int cinfo = cmf >> 4; */
+	int flg = stbi__zget8(a);
+	if ((cmf * 256 + flg) % 31 != 0) return stbi__err("bad zlib header", "Corrupt PNG"); // zlib spec
+	if (flg & 32) return stbi__err("no preset dict", "Corrupt PNG"); // preset dictionary not allowed in png
+	if (cm != 8) return stbi__err("bad compression", "Corrupt PNG"); // DEFLATE required for png
+	// window = 1 << (8 + cinfo)... but who cares, we fully buffer output
+	return 1;
+}
+
+static const stbi_uc stbi__zdefault_length[288] =
+{
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
+};
+static const stbi_uc stbi__zdefault_distance[32] =
+{
+   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
+};
+/*
+Init algorithm:
+{
+   int i;   // use <= to match clearly with spec
+   for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;
+   for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9;
+   for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7;
+   for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8;
+
+   for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;
+}
+*/
+
+static int stbi__parse_zlib(stbi__zbuf* a, int parse_header)
+{
+	int final, type;
+	if (parse_header)
+		if (!stbi__parse_zlib_header(a)) return 0;
+	a->num_bits = 0;
+	a->code_buffer = 0;
+	do {
+		final = stbi__zreceive(a, 1);
+		type = stbi__zreceive(a, 2);
+		if (type == 0) {
+			if (!stbi__parse_uncompressed_block(a)) return 0;
+		}
+		else if (type == 3) {
+			return 0;
+		}
+		else {
+			if (type == 1) {
+				// use fixed code lengths
+				if (!stbi__zbuild_huffman(&a->z_length, stbi__zdefault_length, 288)) return 0;
+				if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0;
+			}
+			else {
+				if (!stbi__compute_huffman_codes(a)) return 0;
+			}
+			if (!stbi__parse_huffman_block(a)) return 0;
+		}
+	} while (!final);
+	return 1;
+}
+
+static int stbi__do_zlib(stbi__zbuf* a, char* obuf, int olen, int exp, int parse_header)
+{
+	a->zout_start = obuf;
+	a->zout = obuf;
+	a->zout_end = obuf + olen;
+	a->z_expandable = exp;
+
+	return stbi__parse_zlib(a, parse_header);
+}
+
+STBIDEF char* stbi_zlib_decode_malloc_guesssize(const char* buffer, int len, int initial_size, int* outlen)
+{
+	stbi__zbuf a;
+	char* p = (char*)stbi__malloc(initial_size);
+	if (p == NULL) return NULL;
+	a.zbuffer = (stbi_uc*)buffer;
+	a.zbuffer_end = (stbi_uc*)buffer + len;
+	if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
+		if (outlen)* outlen = (int)(a.zout - a.zout_start);
+		return a.zout_start;
+	}
+	else {
+		STBI_FREE(a.zout_start);
+		return NULL;
+	}
+}
+
+STBIDEF char* stbi_zlib_decode_malloc(char const* buffer, int len, int* outlen)
+{
+	return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
+}
+
+STBIDEF char* stbi_zlib_decode_malloc_guesssize_headerflag(const char* buffer, int len, int initial_size, int* outlen, int parse_header)
+{
+	stbi__zbuf a;
+	char* p = (char*)stbi__malloc(initial_size);
+	if (p == NULL) return NULL;
+	a.zbuffer = (stbi_uc*)buffer;
+	a.zbuffer_end = (stbi_uc*)buffer + len;
+	if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
+		if (outlen)* outlen = (int)(a.zout - a.zout_start);
+		return a.zout_start;
+	}
+	else {
+		STBI_FREE(a.zout_start);
+		return NULL;
+	}
+}
+
+STBIDEF int stbi_zlib_decode_buffer(char* obuffer, int olen, char const* ibuffer, int ilen)
+{
+	stbi__zbuf a;
+	a.zbuffer = (stbi_uc*)ibuffer;
+	a.zbuffer_end = (stbi_uc*)ibuffer + ilen;
+	if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
+		return (int)(a.zout - a.zout_start);
+	else
+		return -1;
+}
+
+STBIDEF char* stbi_zlib_decode_noheader_malloc(char const* buffer, int len, int* outlen)
+{
+	stbi__zbuf a;
+	char* p = (char*)stbi__malloc(16384);
+	if (p == NULL) return NULL;
+	a.zbuffer = (stbi_uc*)buffer;
+	a.zbuffer_end = (stbi_uc*)buffer + len;
+	if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
+		if (outlen)* outlen = (int)(a.zout - a.zout_start);
+		return a.zout_start;
+	}
+	else {
+		STBI_FREE(a.zout_start);
+		return NULL;
+	}
+}
+
+STBIDEF int stbi_zlib_decode_noheader_buffer(char* obuffer, int olen, const char* ibuffer, int ilen)
+{
+	stbi__zbuf a;
+	a.zbuffer = (stbi_uc*)ibuffer;
+	a.zbuffer_end = (stbi_uc*)ibuffer + ilen;
+	if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
+		return (int)(a.zout - a.zout_start);
+	else
+		return -1;
+}
+#endif
+
+// public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
+//    simple implementation
+//      - only 8-bit samples
+//      - no CRC checking
+//      - allocates lots of intermediate memory
+//        - avoids problem of streaming data between subsystems
+//        - avoids explicit window management
+//    performance
+//      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
+
+#ifndef STBI_NO_PNG
+typedef struct
+{
+	stbi__uint32 length;
+	stbi__uint32 type;
+} stbi__pngchunk;
+
+static stbi__pngchunk stbi__get_chunk_header(stbi__context* s)
+{
+	stbi__pngchunk c;
+	c.length = stbi__get32be(s);
+	c.type = stbi__get32be(s);
+	return c;
+}
+
+static int stbi__check_png_header(stbi__context* s)
+{
+	static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
+	int i;
+	for (i = 0; i < 8; ++i)
+		if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig", "Not a PNG");
+	return 1;
+}
+
+typedef struct
+{
+	stbi__context* s;
+	stbi_uc* idata, * expanded, * out;
+	int depth;
+} stbi__png;
+
+
+enum {
+	STBI__F_none = 0,
+	STBI__F_sub = 1,
+	STBI__F_up = 2,
+	STBI__F_avg = 3,
+	STBI__F_paeth = 4,
+	// synthetic filters used for first scanline to avoid needing a dummy row of 0s
+	STBI__F_avg_first,
+	STBI__F_paeth_first
+};
+
+static stbi_uc first_row_filter[5] =
+{
+   STBI__F_none,
+   STBI__F_sub,
+   STBI__F_none,
+   STBI__F_avg_first,
+   STBI__F_paeth_first
+};
+
+static int stbi__paeth(int a, int b, int c)
+{
+	int p = a + b - c;
+	int pa = abs(p - a);
+	int pb = abs(p - b);
+	int pc = abs(p - c);
+	if (pa <= pb && pa <= pc) return a;
+	if (pb <= pc) return b;
+	return c;
+}
+
+static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
+
+// create the png data from post-deflated data
+static int stbi__create_png_image_raw(stbi__png* a, stbi_uc* raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
+{
+	int bytes = (depth == 16 ? 2 : 1);
+	stbi__context* s = a->s;
+	stbi__uint32 i, j, stride = x * out_n * bytes;
+	stbi__uint32 img_len, img_width_bytes;
+	int k;
+	int img_n = s->img_n; // copy it into a local for later
+
+	int output_bytes = out_n * bytes;
+	int filter_bytes = img_n * bytes;
+	int width = x;
+
+	STBI_ASSERT(out_n == s->img_n || out_n == s->img_n + 1);
+	a->out = (stbi_uc*)stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
+	if (!a->out) return stbi__err("outofmem", "Out of memory");
+
+	if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG");
+	img_width_bytes = (((img_n * x * depth) + 7) >> 3);
+	img_len = (img_width_bytes + 1) * y;
+
+	// we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
+	// but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
+	// so just check for raw_len < img_len always.
+	if (raw_len < img_len) return stbi__err("not enough pixels", "Corrupt PNG");
+
+	for (j = 0; j < y; ++j) {
+		stbi_uc* cur = a->out + stride * j;
+		stbi_uc* prior;
+		int filter = *raw++;
+
+		if (filter > 4)
+			return stbi__err("invalid filter", "Corrupt PNG");
+
+		if (depth < 8) {
+			STBI_ASSERT(img_width_bytes <= x);
+			cur += x * out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
+			filter_bytes = 1;
+			width = img_width_bytes;
+		}
+		prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above
+
+		// if first row, use special filter that doesn't sample previous row
+		if (j == 0) filter = first_row_filter[filter];
+
+		// handle first byte explicitly
+		for (k = 0; k < filter_bytes; ++k) {
+			switch (filter) {
+			case STBI__F_none: cur[k] = raw[k]; break;
+			case STBI__F_sub: cur[k] = raw[k]; break;
+			case STBI__F_up: cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
+			case STBI__F_avg: cur[k] = STBI__BYTECAST(raw[k] + (prior[k] >> 1)); break;
+			case STBI__F_paeth: cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0, prior[k], 0)); break;
+			case STBI__F_avg_first: cur[k] = raw[k]; break;
+			case STBI__F_paeth_first: cur[k] = raw[k]; break;
+			}
+		}
+
+		if (depth == 8) {
+			if (img_n != out_n)
+				cur[img_n] = 255; // first pixel
+			raw += img_n;
+			cur += out_n;
+			prior += out_n;
+		}
+		else if (depth == 16) {
+			if (img_n != out_n) {
+				cur[filter_bytes] = 255; // first pixel top byte
+				cur[filter_bytes + 1] = 255; // first pixel bottom byte
+			}
+			raw += filter_bytes;
+			cur += output_bytes;
+			prior += output_bytes;
+		}
+		else {
+			raw += 1;
+			cur += 1;
+			prior += 1;
+		}
+
+		// this is a little gross, so that we don't switch per-pixel or per-component
+		if (depth < 8 || img_n == out_n) {
+			int nk = (width - 1) * filter_bytes;
+#define STBI__CASE(f) \
+			 case f:     \
+				for (k=0; k < nk; ++k)
+			switch (filter) {
+				// "none" filter turns into a memcpy here; make that explicit.
+			case STBI__F_none:         memcpy(cur, raw, nk); break;
+				STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k - filter_bytes]); } break;
+				STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
+				STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k - filter_bytes]) >> 1)); } break;
+				STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - filter_bytes], prior[k], prior[k - filter_bytes])); } break;
+				STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k - filter_bytes] >> 1)); } break;
+				STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - filter_bytes], 0, 0)); } break;
+			}
+#undef STBI__CASE
+			raw += nk;
+		}
+		else {
+			STBI_ASSERT(img_n + 1 == out_n);
+#define STBI__CASE(f) \
+			 case f:     \
+				for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
+				   for (k=0; k < filter_bytes; ++k)
+			switch (filter) {
+				STBI__CASE(STBI__F_none) { cur[k] = raw[k]; } break;
+				STBI__CASE(STBI__F_sub) { cur[k] = STBI__BYTECAST(raw[k] + cur[k - output_bytes]); } break;
+				STBI__CASE(STBI__F_up) { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
+				STBI__CASE(STBI__F_avg) { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k - output_bytes]) >> 1)); } break;
+				STBI__CASE(STBI__F_paeth) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - output_bytes], prior[k], prior[k - output_bytes])); } break;
+				STBI__CASE(STBI__F_avg_first) { cur[k] = STBI__BYTECAST(raw[k] + (cur[k - output_bytes] >> 1)); } break;
+				STBI__CASE(STBI__F_paeth_first) { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k - output_bytes], 0, 0)); } break;
+			}
+#undef STBI__CASE
+
+			// the loop above sets the high byte of the pixels' alpha, but for
+			// 16 bit png files we also need the low byte set. we'll do that here.
+			if (depth == 16) {
+				cur = a->out + stride * j; // start at the beginning of the row again
+				for (i = 0; i < x; ++i, cur += output_bytes) {
+					cur[filter_bytes + 1] = 255;
+				}
+			}
+		}
+	}
+
+	// we make a separate pass to expand bits to pixels; for performance,
+	// this could run two scanlines behind the above code, so it won't
+	// intefere with filtering but will still be in the cache.
+	if (depth < 8) {
+		for (j = 0; j < y; ++j) {
+			stbi_uc* cur = a->out + stride * j;
+			stbi_uc* in = a->out + stride * j + x * out_n - img_width_bytes;
+			// unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
+			// png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
+			stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
+
+			// note that the final byte might overshoot and write more data than desired.
+			// we can allocate enough data that this never writes out of memory, but it
+			// could also overwrite the next scanline. can it overwrite non-empty data
+			// on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
+			// so we need to explicitly clamp the final ones
+
+			if (depth == 4) {
+				for (k = x * img_n; k >= 2; k -= 2, ++in) {
+					*cur++ = scale * ((*in >> 4));
+					*cur++ = scale * ((*in) & 0x0f);
+				}
+				if (k > 0)* cur++ = scale * ((*in >> 4));
+			}
+			else if (depth == 2) {
+				for (k = x * img_n; k >= 4; k -= 4, ++in) {
+					*cur++ = scale * ((*in >> 6));
+					*cur++ = scale * ((*in >> 4) & 0x03);
+					*cur++ = scale * ((*in >> 2) & 0x03);
+					*cur++ = scale * ((*in) & 0x03);
+				}
+				if (k > 0)* cur++ = scale * ((*in >> 6));
+				if (k > 1)* cur++ = scale * ((*in >> 4) & 0x03);
+				if (k > 2)* cur++ = scale * ((*in >> 2) & 0x03);
+			}
+			else if (depth == 1) {
+				for (k = x * img_n; k >= 8; k -= 8, ++in) {
+					*cur++ = scale * ((*in >> 7));
+					*cur++ = scale * ((*in >> 6) & 0x01);
+					*cur++ = scale * ((*in >> 5) & 0x01);
+					*cur++ = scale * ((*in >> 4) & 0x01);
+					*cur++ = scale * ((*in >> 3) & 0x01);
+					*cur++ = scale * ((*in >> 2) & 0x01);
+					*cur++ = scale * ((*in >> 1) & 0x01);
+					*cur++ = scale * ((*in) & 0x01);
+				}
+				if (k > 0)* cur++ = scale * ((*in >> 7));
+				if (k > 1)* cur++ = scale * ((*in >> 6) & 0x01);
+				if (k > 2)* cur++ = scale * ((*in >> 5) & 0x01);
+				if (k > 3)* cur++ = scale * ((*in >> 4) & 0x01);
+				if (k > 4)* cur++ = scale * ((*in >> 3) & 0x01);
+				if (k > 5)* cur++ = scale * ((*in >> 2) & 0x01);
+				if (k > 6)* cur++ = scale * ((*in >> 1) & 0x01);
+			}
+			if (img_n != out_n) {
+				int q;
+				// insert alpha = 255
+				cur = a->out + stride * j;
+				if (img_n == 1) {
+					for (q = x - 1; q >= 0; --q) {
+						cur[q * 2 + 1] = 255;
+						cur[q * 2 + 0] = cur[q];
+					}
+				}
+				else {
+					STBI_ASSERT(img_n == 3);
+					for (q = x - 1; q >= 0; --q) {
+						cur[q * 4 + 3] = 255;
+						cur[q * 4 + 2] = cur[q * 3 + 2];
+						cur[q * 4 + 1] = cur[q * 3 + 1];
+						cur[q * 4 + 0] = cur[q * 3 + 0];
+					}
+				}
+			}
+		}
+	}
+	else if (depth == 16) {
+		// force the image data from big-endian to platform-native.
+		// this is done in a separate pass due to the decoding relying
+		// on the data being untouched, but could probably be done
+		// per-line during decode if care is taken.
+		stbi_uc* cur = a->out;
+		stbi__uint16* cur16 = (stbi__uint16*)cur;
+
+		for (i = 0; i < x * y * out_n; ++i, cur16++, cur += 2) {
+			*cur16 = (cur[0] << 8) | cur[1];
+		}
+	}
+
+	return 1;
+}
+
+static int stbi__create_png_image(stbi__png* a, stbi_uc* image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
+{
+	int bytes = (depth == 16 ? 2 : 1);
+	int out_bytes = out_n * bytes;
+	stbi_uc* final;
+	int p;
+	if (!interlaced)
+		return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
+
+	// de-interlacing
+	final = (stbi_uc*)stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
+	for (p = 0; p < 7; ++p) {
+		int xorig[] = { 0,4,0,2,0,1,0 };
+		int yorig[] = { 0,0,4,0,2,0,1 };
+		int xspc[] = { 8,8,4,4,2,2,1 };
+		int yspc[] = { 8,8,8,4,4,2,2 };
+		int i, j, x, y;
+		// pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
+		x = (a->s->img_x - xorig[p] + xspc[p] - 1) / xspc[p];
+		y = (a->s->img_y - yorig[p] + yspc[p] - 1) / yspc[p];
+		if (x && y) {
+			stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
+			if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
+				STBI_FREE(final);
+				return 0;
+			}
+			for (j = 0; j < y; ++j) {
+				for (i = 0; i < x; ++i) {
+					int out_y = j * yspc[p] + yorig[p];
+					int out_x = i * xspc[p] + xorig[p];
+					memcpy(final + out_y * a->s->img_x * out_bytes + out_x * out_bytes,
+						a->out + (j * x + i) * out_bytes, out_bytes);
+				}
+			}
+			STBI_FREE(a->out);
+			image_data += img_len;
+			image_data_len -= img_len;
+		}
+	}
+	a->out = final;
+
+	return 1;
+}
+
+static int stbi__compute_transparency(stbi__png* z, stbi_uc tc[3], int out_n)
+{
+	stbi__context* s = z->s;
+	stbi__uint32 i, pixel_count = s->img_x * s->img_y;
+	stbi_uc* p = z->out;
+
+	// compute color-based transparency, assuming we've
+	// already got 255 as the alpha value in the output
+	STBI_ASSERT(out_n == 2 || out_n == 4);
+
+	if (out_n == 2) {
+		for (i = 0; i < pixel_count; ++i) {
+			p[1] = (p[0] == tc[0] ? 0 : 255);
+			p += 2;
+		}
+	}
+	else {
+		for (i = 0; i < pixel_count; ++i) {
+			if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
+				p[3] = 0;
+			p += 4;
+		}
+	}
+	return 1;
+}
+
+static int stbi__compute_transparency16(stbi__png* z, stbi__uint16 tc[3], int out_n)
+{
+	stbi__context* s = z->s;
+	stbi__uint32 i, pixel_count = s->img_x * s->img_y;
+	stbi__uint16* p = (stbi__uint16*)z->out;
+
+	// compute color-based transparency, assuming we've
+	// already got 65535 as the alpha value in the output
+	STBI_ASSERT(out_n == 2 || out_n == 4);
+
+	if (out_n == 2) {
+		for (i = 0; i < pixel_count; ++i) {
+			p[1] = (p[0] == tc[0] ? 0 : 65535);
+			p += 2;
+		}
+	}
+	else {
+		for (i = 0; i < pixel_count; ++i) {
+			if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
+				p[3] = 0;
+			p += 4;
+		}
+	}
+	return 1;
+}
+
+static int stbi__expand_png_palette(stbi__png* a, stbi_uc* palette, int len, int pal_img_n)
+{
+	stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
+	stbi_uc* p, * temp_out, * orig = a->out;
+
+	p = (stbi_uc*)stbi__malloc_mad2(pixel_count, pal_img_n, 0);
+	if (p == NULL) return stbi__err("outofmem", "Out of memory");
+
+	// between here and free(out) below, exitting would leak
+	temp_out = p;
+
+	if (pal_img_n == 3) {
+		for (i = 0; i < pixel_count; ++i) {
+			int n = orig[i] * 4;
+			p[0] = palette[n];
+			p[1] = palette[n + 1];
+			p[2] = palette[n + 2];
+			p += 3;
+		}
+	}
+	else {
+		for (i = 0; i < pixel_count; ++i) {
+			int n = orig[i] * 4;
+			p[0] = palette[n];
+			p[1] = palette[n + 1];
+			p[2] = palette[n + 2];
+			p[3] = palette[n + 3];
+			p += 4;
+		}
+	}
+	STBI_FREE(a->out);
+	a->out = temp_out;
+
+	STBI_NOTUSED(len);
+
+	return 1;
+}
+
+static int stbi__unpremultiply_on_load = 0;
+static int stbi__de_iphone_flag = 0;
+
+STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
+{
+	stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
+}
+
+STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
+{
+	stbi__de_iphone_flag = flag_true_if_should_convert;
+}
+
+static void stbi__de_iphone(stbi__png* z)
+{
+	stbi__context* s = z->s;
+	stbi__uint32 i, pixel_count = s->img_x * s->img_y;
+	stbi_uc* p = z->out;
+
+	if (s->img_out_n == 3) {  // convert bgr to rgb
+		for (i = 0; i < pixel_count; ++i) {
+			stbi_uc t = p[0];
+			p[0] = p[2];
+			p[2] = t;
+			p += 3;
+		}
+	}
+	else {
+		STBI_ASSERT(s->img_out_n == 4);
+		if (stbi__unpremultiply_on_load) {
+			// convert bgr to rgb and unpremultiply
+			for (i = 0; i < pixel_count; ++i) {
+				stbi_uc a = p[3];
+				stbi_uc t = p[0];
+				if (a) {
+					stbi_uc half = a / 2;
+					p[0] = (p[2] * 255 + half) / a;
+					p[1] = (p[1] * 255 + half) / a;
+					p[2] = (t * 255 + half) / a;
+				}
+				else {
+					p[0] = p[2];
+					p[2] = t;
+				}
+				p += 4;
+			}
+		}
+		else {
+			// convert bgr to rgb
+			for (i = 0; i < pixel_count; ++i) {
+				stbi_uc t = p[0];
+				p[0] = p[2];
+				p[2] = t;
+				p += 4;
+			}
+		}
+	}
+}
+
+#define STBI__PNG_TYPE(a,b,c,d)  (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
+
+static int stbi__parse_png_file(stbi__png* z, int scan, int req_comp)
+{
+	stbi_uc palette[1024], pal_img_n = 0;
+	stbi_uc has_trans = 0, tc[3];
+	stbi__uint16 tc16[3];
+	stbi__uint32 ioff = 0, idata_limit = 0, i, pal_len = 0;
+	int first = 1, k, interlace = 0, color = 0, is_iphone = 0;
+	stbi__context* s = z->s;
+
+	z->expanded = NULL;
+	z->idata = NULL;
+	z->out = NULL;
+
+	if (!stbi__check_png_header(s)) return 0;
+
+	if (scan == STBI__SCAN_type) return 1;
+
+	for (;;) {
+		stbi__pngchunk c = stbi__get_chunk_header(s);
+		switch (c.type) {
+		case STBI__PNG_TYPE('C', 'g', 'B', 'I'):
+			is_iphone = 1;
+			stbi__skip(s, c.length);
+			break;
+		case STBI__PNG_TYPE('I', 'H', 'D', 'R'): {
+			int comp, filter;
+			if (!first) return stbi__err("multiple IHDR", "Corrupt PNG");
+			first = 0;
+			if (c.length != 13) return stbi__err("bad IHDR len", "Corrupt PNG");
+			s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large", "Very large image (corrupt?)");
+			s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large", "Very large image (corrupt?)");
+			z->depth = stbi__get8(s);  if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)  return stbi__err("1/2/4/8/16-bit only", "PNG not supported: 1/2/4/8/16-bit only");
+			color = stbi__get8(s);  if (color > 6)         return stbi__err("bad ctype", "Corrupt PNG");
+			if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype", "Corrupt PNG");
+			if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype", "Corrupt PNG");
+			comp = stbi__get8(s);  if (comp) return stbi__err("bad comp method", "Corrupt PNG");
+			filter = stbi__get8(s);  if (filter) return stbi__err("bad filter method", "Corrupt PNG");
+			interlace = stbi__get8(s); if (interlace > 1) return stbi__err("bad interlace method", "Corrupt PNG");
+			if (!s->img_x || !s->img_y) return stbi__err("0-pixel image", "Corrupt PNG");
+			if (!pal_img_n) {
+				s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
+				if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
+				if (scan == STBI__SCAN_header) return 1;
+			}
+			else {
+				// if paletted, then pal_n is our final components, and
+				// img_n is # components to decompress/filter.
+				s->img_n = 1;
+				if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large", "Corrupt PNG");
+				// if SCAN_header, have to scan to see if we have a tRNS
+			}
+			break;
+		}
+
+		case STBI__PNG_TYPE('P', 'L', 'T', 'E'): {
+			if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+			if (c.length > 256 * 3) return stbi__err("invalid PLTE", "Corrupt PNG");
+			pal_len = c.length / 3;
+			if (pal_len * 3 != c.length) return stbi__err("invalid PLTE", "Corrupt PNG");
+			for (i = 0; i < pal_len; ++i) {
+				palette[i * 4 + 0] = stbi__get8(s);
+				palette[i * 4 + 1] = stbi__get8(s);
+				palette[i * 4 + 2] = stbi__get8(s);
+				palette[i * 4 + 3] = 255;
+			}
+			break;
+		}
+
+		case STBI__PNG_TYPE('t', 'R', 'N', 'S'): {
+			if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+			if (z->idata) return stbi__err("tRNS after IDAT", "Corrupt PNG");
+			if (pal_img_n) {
+				if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
+				if (pal_len == 0) return stbi__err("tRNS before PLTE", "Corrupt PNG");
+				if (c.length > pal_len) return stbi__err("bad tRNS len", "Corrupt PNG");
+				pal_img_n = 4;
+				for (i = 0; i < c.length; ++i)
+					palette[i * 4 + 3] = stbi__get8(s);
+			}
+			else {
+				if (!(s->img_n & 1)) return stbi__err("tRNS with alpha", "Corrupt PNG");
+				if (c.length != (stbi__uint32)s->img_n * 2) return stbi__err("bad tRNS len", "Corrupt PNG");
+				has_trans = 1;
+				if (z->depth == 16) {
+					for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
+				}
+				else {
+					for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
+				}
+			}
+			break;
+		}
+
+		case STBI__PNG_TYPE('I', 'D', 'A', 'T'): {
+			if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+			if (pal_img_n && !pal_len) return stbi__err("no PLTE", "Corrupt PNG");
+			if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
+			if ((int)(ioff + c.length) < (int)ioff) return 0;
+			if (ioff + c.length > idata_limit) {
+				stbi__uint32 idata_limit_old = idata_limit;
+				stbi_uc* p;
+				if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
+				while (ioff + c.length > idata_limit)
+					idata_limit *= 2;
+				STBI_NOTUSED(idata_limit_old);
+				p = (stbi_uc*)STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
+				z->idata = p;
+			}
+			if (!stbi__getn(s, z->idata + ioff, c.length)) return stbi__err("outofdata", "Corrupt PNG");
+			ioff += c.length;
+			break;
+		}
+
+		case STBI__PNG_TYPE('I', 'E', 'N', 'D'): {
+			stbi__uint32 raw_len, bpl;
+			if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+			if (scan != STBI__SCAN_load) return 1;
+			if (z->idata == NULL) return stbi__err("no IDAT", "Corrupt PNG");
+			// initial guess for decoded data size to avoid unnecessary reallocs
+			bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
+			raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
+			z->expanded = (stbi_uc*)stbi_zlib_decode_malloc_guesssize_headerflag((char*)z->idata, ioff, raw_len, (int*)& raw_len, !is_iphone);
+			if (z->expanded == NULL) return 0; // zlib should set error
+			STBI_FREE(z->idata); z->idata = NULL;
+			if ((req_comp == s->img_n + 1 && req_comp != 3 && !pal_img_n) || has_trans)
+				s->img_out_n = s->img_n + 1;
+			else
+				s->img_out_n = s->img_n;
+			if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
+			if (has_trans) {
+				if (z->depth == 16) {
+					if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
+				}
+				else {
+					if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
+				}
+			}
+			if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
+				stbi__de_iphone(z);
+			if (pal_img_n) {
+				// pal_img_n == 3 or 4
+				s->img_n = pal_img_n; // record the actual colors we had
+				s->img_out_n = pal_img_n;
+				if (req_comp >= 3) s->img_out_n = req_comp;
+				if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
+					return 0;
+			}
+			else if (has_trans) {
+				// non-paletted image with tRNS -> source image has (constant) alpha
+				++s->img_n;
+			}
+			STBI_FREE(z->expanded); z->expanded = NULL;
+			return 1;
+		}
+
+		default:
+			// if critical, fail
+			if (first) return stbi__err("first not IHDR", "Corrupt PNG");
+			if ((c.type & (1 << 29)) == 0) {
+#ifndef STBI_NO_FAILURE_STRINGS
+				// not threadsafe
+				static char invalid_chunk[] = "XXXX PNG chunk not known";
+				invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
+				invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
+				invalid_chunk[2] = STBI__BYTECAST(c.type >> 8);
+				invalid_chunk[3] = STBI__BYTECAST(c.type >> 0);
+#endif
+				return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
+			}
+			stbi__skip(s, c.length);
+			break;
+		}
+		// end of PNG chunk, read and skip CRC
+		stbi__get32be(s);
+	}
+}
+
+static void* stbi__do_png(stbi__png* p, int* x, int* y, int* n, int req_comp, stbi__result_info* ri)
+{
+	void* result = NULL;
+	if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
+	if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
+		if (p->depth < 8)
+			ri->bits_per_channel = 8;
+		else
+			ri->bits_per_channel = p->depth;
+		result = p->out;
+		p->out = NULL;
+		if (req_comp && req_comp != p->s->img_out_n) {
+			if (ri->bits_per_channel == 8)
+				result = stbi__convert_format((unsigned char*)result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
+			else
+				result = stbi__convert_format16((stbi__uint16*)result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
+			p->s->img_out_n = req_comp;
+			if (result == NULL) return result;
+		}
+		*x = p->s->img_x;
+		*y = p->s->img_y;
+		if (n)* n = p->s->img_n;
+	}
+	STBI_FREE(p->out);      p->out = NULL;
+	STBI_FREE(p->expanded); p->expanded = NULL;
+	STBI_FREE(p->idata);    p->idata = NULL;
+
+	return result;
+}
+
+static void* stbi__png_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri)
+{
+	stbi__png p;
+	p.s = s;
+	return stbi__do_png(&p, x, y, comp, req_comp, ri);
+}
+
+static int stbi__png_test(stbi__context* s)
+{
+	int r;
+	r = stbi__check_png_header(s);
+	stbi__rewind(s);
+	return r;
+}
+
+static int stbi__png_info_raw(stbi__png* p, int* x, int* y, int* comp)
+{
+	if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
+		stbi__rewind(p->s);
+		return 0;
+	}
+	if (x)* x = p->s->img_x;
+	if (y)* y = p->s->img_y;
+	if (comp)* comp = p->s->img_n;
+	return 1;
+}
+
+static int stbi__png_info(stbi__context* s, int* x, int* y, int* comp)
+{
+	stbi__png p;
+	p.s = s;
+	return stbi__png_info_raw(&p, x, y, comp);
+}
+
+static int stbi__png_is16(stbi__context* s)
+{
+	stbi__png p;
+	p.s = s;
+	if (!stbi__png_info_raw(&p, NULL, NULL, NULL))
+		return 0;
+	if (p.depth != 16) {
+		stbi__rewind(p.s);
+		return 0;
+	}
+	return 1;
+}
+#endif
+
+// Microsoft/Windows BMP image
+
+#ifndef STBI_NO_BMP
+static int stbi__bmp_test_raw(stbi__context* s)
+{
+	int r;
+	int sz;
+	if (stbi__get8(s) != 'B') return 0;
+	if (stbi__get8(s) != 'M') return 0;
+	stbi__get32le(s); // discard filesize
+	stbi__get16le(s); // discard reserved
+	stbi__get16le(s); // discard reserved
+	stbi__get32le(s); // discard data offset
+	sz = stbi__get32le(s);
+	r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
+	return r;
+}
+
+static int stbi__bmp_test(stbi__context* s)
+{
+	int r = stbi__bmp_test_raw(s);
+	stbi__rewind(s);
+	return r;
+}
+
+
+// returns 0..31 for the highest set bit
+static int stbi__high_bit(unsigned int z)
+{
+	int n = 0;
+	if (z == 0) return -1;
+	if (z >= 0x10000) n += 16, z >>= 16;
+	if (z >= 0x00100) n += 8, z >>= 8;
+	if (z >= 0x00010) n += 4, z >>= 4;
+	if (z >= 0x00004) n += 2, z >>= 2;
+	if (z >= 0x00002) n += 1, z >>= 1;
+	return n;
+}
+
+static int stbi__bitcount(unsigned int a)
+{
+	a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2
+	a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4
+	a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
+	a = (a + (a >> 8)); // max 16 per 8 bits
+	a = (a + (a >> 16)); // max 32 per 8 bits
+	return a & 0xff;
+}
+
+// extract an arbitrarily-aligned N-bit value (N=bits)
+// from v, and then make it 8-bits long and fractionally
+// extend it to full full range.
+static int stbi__shiftsigned(int v, int shift, int bits)
+{
+	static unsigned int mul_table[9] = {
+	   0,
+	   0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/,
+	   0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/,
+	};
+	static unsigned int shift_table[9] = {
+	   0, 0,0,1,0,2,4,6,0,
+	};
+	if (shift < 0)
+		v <<= -shift;
+	else
+		v >>= shift;
+	STBI_ASSERT(v >= 0 && v < 256);
+	v >>= (8 - bits);
+	STBI_ASSERT(bits >= 0 && bits <= 8);
+	return (int)((unsigned)v * mul_table[bits]) >> shift_table[bits];
+}
+
+typedef struct
+{
+	int bpp, offset, hsz;
+	unsigned int mr, mg, mb, ma, all_a;
+} stbi__bmp_data;
+
+static void* stbi__bmp_parse_header(stbi__context* s, stbi__bmp_data* info)
+{
+	int hsz;
+	if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
+	stbi__get32le(s); // discard filesize
+	stbi__get16le(s); // discard reserved
+	stbi__get16le(s); // discard reserved
+	info->offset = stbi__get32le(s);
+	info->hsz = hsz = stbi__get32le(s);
+	info->mr = info->mg = info->mb = info->ma = 0;
+
+	if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
+	if (hsz == 12) {
+		s->img_x = stbi__get16le(s);
+		s->img_y = stbi__get16le(s);
+	}
+	else {
+		s->img_x = stbi__get32le(s);
+		s->img_y = stbi__get32le(s);
+	}
+	if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
+	info->bpp = stbi__get16le(s);
+	if (hsz != 12) {
+		int compress = stbi__get32le(s);
+		if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
+		stbi__get32le(s); // discard sizeof
+		stbi__get32le(s); // discard hres
+		stbi__get32le(s); // discard vres
+		stbi__get32le(s); // discard colorsused
+		stbi__get32le(s); // discard max important
+		if (hsz == 40 || hsz == 56) {
+			if (hsz == 56) {
+				stbi__get32le(s);
+				stbi__get32le(s);
+				stbi__get32le(s);
+				stbi__get32le(s);
+			}
+			if (info->bpp == 16 || info->bpp == 32) {
+				if (compress == 0) {
+					if (info->bpp == 32) {
+						info->mr = 0xffu << 16;
+						info->mg = 0xffu << 8;
+						info->mb = 0xffu << 0;
+						info->ma = 0xffu << 24;
+						info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
+					}
+					else {
+						info->mr = 31u << 10;
+						info->mg = 31u << 5;
+						info->mb = 31u << 0;
+					}
+				}
+				else if (compress == 3) {
+					info->mr = stbi__get32le(s);
+					info->mg = stbi__get32le(s);
+					info->mb = stbi__get32le(s);
+					// not documented, but generated by photoshop and handled by mspaint
+					if (info->mr == info->mg && info->mg == info->mb) {
+						// ?!?!?
+						return stbi__errpuc("bad BMP", "bad BMP");
+					}
+				}
+				else
+					return stbi__errpuc("bad BMP", "bad BMP");
+			}
+		}
+		else {
+			int i;
+			if (hsz != 108 && hsz != 124)
+				return stbi__errpuc("bad BMP", "bad BMP");
+			info->mr = stbi__get32le(s);
+			info->mg = stbi__get32le(s);
+			info->mb = stbi__get32le(s);
+			info->ma = stbi__get32le(s);
+			stbi__get32le(s); // discard color space
+			for (i = 0; i < 12; ++i)
+				stbi__get32le(s); // discard color space parameters
+			if (hsz == 124) {
+				stbi__get32le(s); // discard rendering intent
+				stbi__get32le(s); // discard offset of profile data
+				stbi__get32le(s); // discard size of profile data
+				stbi__get32le(s); // discard reserved
+			}
+		}
+	}
+	return (void*)1;
+}
+
+
+static void* stbi__bmp_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri)
+{
+	stbi_uc* out;
+	unsigned int mr = 0, mg = 0, mb = 0, ma = 0, all_a;
+	stbi_uc pal[256][4];
+	int psize = 0, i, j, width;
+	int flip_vertically, pad, target;
+	stbi__bmp_data info;
+	STBI_NOTUSED(ri);
+
+	info.all_a = 255;
+	if (stbi__bmp_parse_header(s, &info) == NULL)
+		return NULL; // error code already set
+
+	flip_vertically = ((int)s->img_y) > 0;
+	s->img_y = abs((int)s->img_y);
+
+	mr = info.mr;
+	mg = info.mg;
+	mb = info.mb;
+	ma = info.ma;
+	all_a = info.all_a;
+
+	if (info.hsz == 12) {
+		if (info.bpp < 24)
+			psize = (info.offset - 14 - 24) / 3;
+	}
+	else {
+		if (info.bpp < 16)
+			psize = (info.offset - 14 - info.hsz) >> 2;
+	}
+
+	s->img_n = ma ? 4 : 3;
+	if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
+		target = req_comp;
+	else
+		target = s->img_n; // if they want monochrome, we'll post-convert
+
+	 // sanity-check size
+	if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
+		return stbi__errpuc("too large", "Corrupt BMP");
+
+	out = (stbi_uc*)stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
+	if (!out) return stbi__errpuc("outofmem", "Out of memory");
+	if (info.bpp < 16) {
+		int z = 0;
+		if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
+		for (i = 0; i < psize; ++i) {
+			pal[i][2] = stbi__get8(s);
+			pal[i][1] = stbi__get8(s);
+			pal[i][0] = stbi__get8(s);
+			if (info.hsz != 12) stbi__get8(s);
+			pal[i][3] = 255;
+		}
+		stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
+		if (info.bpp == 1) width = (s->img_x + 7) >> 3;
+		else if (info.bpp == 4) width = (s->img_x + 1) >> 1;
+		else if (info.bpp == 8) width = s->img_x;
+		else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
+		pad = (-width) & 3;
+		if (info.bpp == 1) {
+			for (j = 0; j < (int)s->img_y; ++j) {
+				int bit_offset = 7, v = stbi__get8(s);
+				for (i = 0; i < (int)s->img_x; ++i) {
+					int color = (v >> bit_offset) & 0x1;
+					out[z++] = pal[color][0];
+					out[z++] = pal[color][1];
+					out[z++] = pal[color][2];
+					if ((--bit_offset) < 0) {
+						bit_offset = 7;
+						v = stbi__get8(s);
+					}
+				}
+				stbi__skip(s, pad);
+			}
+		}
+		else {
+			for (j = 0; j < (int)s->img_y; ++j) {
+				for (i = 0; i < (int)s->img_x; i += 2) {
+					int v = stbi__get8(s), v2 = 0;
+					if (info.bpp == 4) {
+						v2 = v & 15;
+						v >>= 4;
+					}
+					out[z++] = pal[v][0];
+					out[z++] = pal[v][1];
+					out[z++] = pal[v][2];
+					if (target == 4) out[z++] = 255;
+					if (i + 1 == (int)s->img_x) break;
+					v = (info.bpp == 8) ? stbi__get8(s) : v2;
+					out[z++] = pal[v][0];
+					out[z++] = pal[v][1];
+					out[z++] = pal[v][2];
+					if (target == 4) out[z++] = 255;
+				}
+				stbi__skip(s, pad);
+			}
+		}
+	}
+	else {
+		int rshift = 0, gshift = 0, bshift = 0, ashift = 0, rcount = 0, gcount = 0, bcount = 0, acount = 0;
+		int z = 0;
+		int easy = 0;
+		stbi__skip(s, info.offset - 14 - info.hsz);
+		if (info.bpp == 24) width = 3 * s->img_x;
+		else if (info.bpp == 16) width = 2 * s->img_x;
+		else /* bpp = 32 and pad = 0 */ width = 0;
+		pad = (-width) & 3;
+		if (info.bpp == 24) {
+			easy = 1;
+		}
+		else if (info.bpp == 32) {
+			if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
+				easy = 2;
+		}
+		if (!easy) {
+			if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
+			// right shift amt to put high bit in position #7
+			rshift = stbi__high_bit(mr) - 7; rcount = stbi__bitcount(mr);
+			gshift = stbi__high_bit(mg) - 7; gcount = stbi__bitcount(mg);
+			bshift = stbi__high_bit(mb) - 7; bcount = stbi__bitcount(mb);
+			ashift = stbi__high_bit(ma) - 7; acount = stbi__bitcount(ma);
+		}
+		for (j = 0; j < (int)s->img_y; ++j) {
+			if (easy) {
+				for (i = 0; i < (int)s->img_x; ++i) {
+					unsigned char a;
+					out[z + 2] = stbi__get8(s);
+					out[z + 1] = stbi__get8(s);
+					out[z + 0] = stbi__get8(s);
+					z += 3;
+					a = (easy == 2 ? stbi__get8(s) : 255);
+					all_a |= a;
+					if (target == 4) out[z++] = a;
+				}
+			}
+			else {
+				int bpp = info.bpp;
+				for (i = 0; i < (int)s->img_x; ++i) {
+					stbi__uint32 v = (bpp == 16 ? (stbi__uint32)stbi__get16le(s) : stbi__get32le(s));
+					unsigned int a;
+					out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
+					out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
+					out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
+					a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
+					all_a |= a;
+					if (target == 4) out[z++] = STBI__BYTECAST(a);
+				}
+			}
+			stbi__skip(s, pad);
+		}
+	}
+
+	// if alpha channel is all 0s, replace with all 255s
+	if (target == 4 && all_a == 0)
+		for (i = 4 * s->img_x * s->img_y - 1; i >= 0; i -= 4)
+			out[i] = 255;
+
+	if (flip_vertically) {
+		stbi_uc t;
+		for (j = 0; j < (int)s->img_y >> 1; ++j) {
+			stbi_uc* p1 = out + j * s->img_x * target;
+			stbi_uc* p2 = out + (s->img_y - 1 - j) * s->img_x * target;
+			for (i = 0; i < (int)s->img_x * target; ++i) {
+				t = p1[i], p1[i] = p2[i], p2[i] = t;
+			}
+		}
+	}
+
+	if (req_comp && req_comp != target) {
+		out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
+		if (out == NULL) return out; // stbi__convert_format frees input on failure
+	}
+
+	*x = s->img_x;
+	*y = s->img_y;
+	if (comp)* comp = s->img_n;
+	return out;
+}
+#endif
+
+// Targa Truevision - TGA
+// by Jonathan Dummer
+#ifndef STBI_NO_TGA
+// returns STBI_rgb or whatever, 0 on error
+static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
+{
+	// only RGB or RGBA (incl. 16bit) or grey allowed
+	if (is_rgb16)* is_rgb16 = 0;
+	switch (bits_per_pixel) {
+	case 8:  return STBI_grey;
+	case 16: if (is_grey) return STBI_grey_alpha;
+		// fallthrough
+	case 15: if (is_rgb16) * is_rgb16 = 1;
+		return STBI_rgb;
+	case 24: // fallthrough
+	case 32: return bits_per_pixel / 8;
+	default: return 0;
+	}
+}
+
+static int stbi__tga_info(stbi__context* s, int* x, int* y, int* comp)
+{
+	int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
+	int sz, tga_colormap_type;
+	stbi__get8(s);                   // discard Offset
+	tga_colormap_type = stbi__get8(s); // colormap type
+	if (tga_colormap_type > 1) {
+		stbi__rewind(s);
+		return 0;      // only RGB or indexed allowed
+	}
+	tga_image_type = stbi__get8(s); // image type
+	if (tga_colormap_type == 1) { // colormapped (paletted) image
+		if (tga_image_type != 1 && tga_image_type != 9) {
+			stbi__rewind(s);
+			return 0;
+		}
+		stbi__skip(s, 4);       // skip index of first colormap entry and number of entries
+		sz = stbi__get8(s);    //   check bits per palette color entry
+		if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32)) {
+			stbi__rewind(s);
+			return 0;
+		}
+		stbi__skip(s, 4);       // skip image x and y origin
+		tga_colormap_bpp = sz;
+	}
+	else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
+		if ((tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11)) {
+			stbi__rewind(s);
+			return 0; // only RGB or grey allowed, +/- RLE
+		}
+		stbi__skip(s, 9); // skip colormap specification and image x/y origin
+		tga_colormap_bpp = 0;
+	}
+	tga_w = stbi__get16le(s);
+	if (tga_w < 1) {
+		stbi__rewind(s);
+		return 0;   // test width
+	}
+	tga_h = stbi__get16le(s);
+	if (tga_h < 1) {
+		stbi__rewind(s);
+		return 0;   // test height
+	}
+	tga_bits_per_pixel = stbi__get8(s); // bits per pixel
+	stbi__get8(s); // ignore alpha bits
+	if (tga_colormap_bpp != 0) {
+		if ((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
+			// when using a colormap, tga_bits_per_pixel is the size of the indexes
+			// I don't think anything but 8 or 16bit indexes makes sense
+			stbi__rewind(s);
+			return 0;
+		}
+		tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
+	}
+	else {
+		tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
+	}
+	if (!tga_comp) {
+		stbi__rewind(s);
+		return 0;
+	}
+	if (x)* x = tga_w;
+	if (y)* y = tga_h;
+	if (comp)* comp = tga_comp;
+	return 1;                   // seems to have passed everything
+}
+
+static int stbi__tga_test(stbi__context* s)
+{
+	int res = 0;
+	int sz, tga_color_type;
+	stbi__get8(s);      //   discard Offset
+	tga_color_type = stbi__get8(s);   //   color type
+	if (tga_color_type > 1) goto errorEnd;   //   only RGB or indexed allowed
+	sz = stbi__get8(s);   //   image type
+	if (tga_color_type == 1) { // colormapped (paletted) image
+		if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
+		stbi__skip(s, 4);       // skip index of first colormap entry and number of entries
+		sz = stbi__get8(s);    //   check bits per palette color entry
+		if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32)) goto errorEnd;
+		stbi__skip(s, 4);       // skip image x and y origin
+	}
+	else { // "normal" image w/o colormap
+		if ((sz != 2) && (sz != 3) && (sz != 10) && (sz != 11)) goto errorEnd; // only RGB or grey allowed, +/- RLE
+		stbi__skip(s, 9); // skip colormap specification and image x/y origin
+	}
+	if (stbi__get16le(s) < 1) goto errorEnd;      //   test width
+	if (stbi__get16le(s) < 1) goto errorEnd;      //   test height
+	sz = stbi__get8(s);   //   bits per pixel
+	if ((tga_color_type == 1) && (sz != 8) && (sz != 16)) goto errorEnd; // for colormapped images, bpp is size of an index
+	if ((sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32)) goto errorEnd;
+
+	res = 1; // if we got this far, everything's good and we can return 1 instead of 0
+
+errorEnd:
+	stbi__rewind(s);
+	return res;
+}
+
+// read 16bit value and convert to 24bit RGB
+static void stbi__tga_read_rgb16(stbi__context* s, stbi_uc* out)
+{
+	stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
+	stbi__uint16 fiveBitMask = 31;
+	// we have 3 channels with 5bits each
+	int r = (px >> 10) & fiveBitMask;
+	int g = (px >> 5) & fiveBitMask;
+	int b = px & fiveBitMask;
+	// Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
+	out[0] = (stbi_uc)((r * 255) / 31);
+	out[1] = (stbi_uc)((g * 255) / 31);
+	out[2] = (stbi_uc)((b * 255) / 31);
+
+	// some people claim that the most significant bit might be used for alpha
+	// (possibly if an alpha-bit is set in the "image descriptor byte")
+	// but that only made 16bit test images completely translucent..
+	// so let's treat all 15 and 16bit TGAs as RGB with no alpha.
+}
+
+static void* stbi__tga_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri)
+{
+	//   read in the TGA header stuff
+	int tga_offset = stbi__get8(s);
+	int tga_indexed = stbi__get8(s);
+	int tga_image_type = stbi__get8(s);
+	int tga_is_RLE = 0;
+	int tga_palette_start = stbi__get16le(s);
+	int tga_palette_len = stbi__get16le(s);
+	int tga_palette_bits = stbi__get8(s);
+	int tga_x_origin = stbi__get16le(s);
+	int tga_y_origin = stbi__get16le(s);
+	int tga_width = stbi__get16le(s);
+	int tga_height = stbi__get16le(s);
+	int tga_bits_per_pixel = stbi__get8(s);
+	int tga_comp, tga_rgb16 = 0;
+	int tga_inverted = stbi__get8(s);
+	// int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
+	//   image data
+	unsigned char* tga_data;
+	unsigned char* tga_palette = NULL;
+	int i, j;
+	unsigned char raw_data[4] = { 0 };
+	int RLE_count = 0;
+	int RLE_repeating = 0;
+	int read_next_pixel = 1;
+	STBI_NOTUSED(ri);
+
+	//   do a tiny bit of precessing
+	if (tga_image_type >= 8)
+	{
+		tga_image_type -= 8;
+		tga_is_RLE = 1;
+	}
+	tga_inverted = 1 - ((tga_inverted >> 5) & 1);
+
+	//   If I'm paletted, then I'll use the number of bits from the palette
+	if (tga_indexed) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
+	else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
+
+	if (!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
+		return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
+
+	//   tga info
+	*x = tga_width;
+	*y = tga_height;
+	if (comp)* comp = tga_comp;
+
+	if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
+		return stbi__errpuc("too large", "Corrupt TGA");
+
+	tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
+	if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
+
+	// skip to the data's starting position (offset usually = 0)
+	stbi__skip(s, tga_offset);
+
+	if (!tga_indexed && !tga_is_RLE && !tga_rgb16) {
+		for (i = 0; i < tga_height; ++i) {
+			int row = tga_inverted ? tga_height - i - 1 : i;
+			stbi_uc* tga_row = tga_data + row * tga_width * tga_comp;
+			stbi__getn(s, tga_row, tga_width * tga_comp);
+		}
+	}
+	else {
+		//   do I need to load a palette?
+		if (tga_indexed)
+		{
+			//   any data to skip? (offset usually = 0)
+			stbi__skip(s, tga_palette_start);
+			//   load the palette
+			tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
+			if (!tga_palette) {
+				STBI_FREE(tga_data);
+				return stbi__errpuc("outofmem", "Out of memory");
+			}
+			if (tga_rgb16) {
+				stbi_uc* pal_entry = tga_palette;
+				STBI_ASSERT(tga_comp == STBI_rgb);
+				for (i = 0; i < tga_palette_len; ++i) {
+					stbi__tga_read_rgb16(s, pal_entry);
+					pal_entry += tga_comp;
+				}
+			}
+			else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
+				STBI_FREE(tga_data);
+				STBI_FREE(tga_palette);
+				return stbi__errpuc("bad palette", "Corrupt TGA");
+			}
+		}
+		//   load the data
+		for (i = 0; i < tga_width * tga_height; ++i)
+		{
+			//   if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
+			if (tga_is_RLE)
+			{
+				if (RLE_count == 0)
+				{
+					//   yep, get the next byte as a RLE command
+					int RLE_cmd = stbi__get8(s);
+					RLE_count = 1 + (RLE_cmd & 127);
+					RLE_repeating = RLE_cmd >> 7;
+					read_next_pixel = 1;
+				}
+				else if (!RLE_repeating)
+				{
+					read_next_pixel = 1;
+				}
+			}
+			else
+			{
+				read_next_pixel = 1;
+			}
+			//   OK, if I need to read a pixel, do it now
+			if (read_next_pixel)
+			{
+				//   load however much data we did have
+				if (tga_indexed)
+				{
+					// read in index, then perform the lookup
+					int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
+					if (pal_idx >= tga_palette_len) {
+						// invalid index
+						pal_idx = 0;
+					}
+					pal_idx *= tga_comp;
+					for (j = 0; j < tga_comp; ++j) {
+						raw_data[j] = tga_palette[pal_idx + j];
+					}
+				}
+				else if (tga_rgb16) {
+					STBI_ASSERT(tga_comp == STBI_rgb);
+					stbi__tga_read_rgb16(s, raw_data);
+				}
+				else {
+					//   read in the data raw
+					for (j = 0; j < tga_comp; ++j) {
+						raw_data[j] = stbi__get8(s);
+					}
+				}
+				//   clear the reading flag for the next pixel
+				read_next_pixel = 0;
+			} // end of reading a pixel
+
+			// copy data
+			for (j = 0; j < tga_comp; ++j)
+				tga_data[i * tga_comp + j] = raw_data[j];
+
+			//   in case we're in RLE mode, keep counting down
+			--RLE_count;
+		}
+		//   do I need to invert the image?
+		if (tga_inverted)
+		{
+			for (j = 0; j * 2 < tga_height; ++j)
+			{
+				int index1 = j * tga_width * tga_comp;
+				int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
+				for (i = tga_width * tga_comp; i > 0; --i)
+				{
+					unsigned char temp = tga_data[index1];
+					tga_data[index1] = tga_data[index2];
+					tga_data[index2] = temp;
+					++index1;
+					++index2;
+				}
+			}
+		}
+		//   clear my palette, if I had one
+		if (tga_palette != NULL)
+		{
+			STBI_FREE(tga_palette);
+		}
+	}
+
+	// swap RGB - if the source data was RGB16, it already is in the right order
+	if (tga_comp >= 3 && !tga_rgb16)
+	{
+		unsigned char* tga_pixel = tga_data;
+		for (i = 0; i < tga_width * tga_height; ++i)
+		{
+			unsigned char temp = tga_pixel[0];
+			tga_pixel[0] = tga_pixel[2];
+			tga_pixel[2] = temp;
+			tga_pixel += tga_comp;
+		}
+	}
+
+	// convert to target component count
+	if (req_comp && req_comp != tga_comp)
+		tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
+
+	//   the things I do to get rid of an error message, and yet keep
+	//   Microsoft's C compilers happy... [8^(
+	tga_palette_start = tga_palette_len = tga_palette_bits =
+		tga_x_origin = tga_y_origin = 0;
+	//   OK, done
+	return tga_data;
+}
+#endif
+
+// *************************************************************************************************
+// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
+
+#ifndef STBI_NO_PSD
+static int stbi__psd_test(stbi__context* s)
+{
+	int r = (stbi__get32be(s) == 0x38425053);
+	stbi__rewind(s);
+	return r;
+}
+
+static int stbi__psd_decode_rle(stbi__context* s, stbi_uc* p, int pixelCount)
+{
+	int count, nleft, len;
+
+	count = 0;
+	while ((nleft = pixelCount - count) > 0) {
+		len = stbi__get8(s);
+		if (len == 128) {
+			// No-op.
+		}
+		else if (len < 128) {
+			// Copy next len+1 bytes literally.
+			len++;
+			if (len > nleft) return 0; // corrupt data
+			count += len;
+			while (len) {
+				*p = stbi__get8(s);
+				p += 4;
+				len--;
+			}
+		}
+		else if (len > 128) {
+			stbi_uc   val;
+			// Next -len+1 bytes in the dest are replicated from next source byte.
+			// (Interpret len as a negative 8-bit int.)
+			len = 257 - len;
+			if (len > nleft) return 0; // corrupt data
+			val = stbi__get8(s);
+			count += len;
+			while (len) {
+				*p = val;
+				p += 4;
+				len--;
+			}
+		}
+	}
+
+	return 1;
+}
+
+static void* stbi__psd_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri, int bpc)
+{
+	int pixelCount;
+	int channelCount, compression;
+	int channel, i;
+	int bitdepth;
+	int w, h;
+	stbi_uc* out;
+	STBI_NOTUSED(ri);
+
+	// Check identifier
+	if (stbi__get32be(s) != 0x38425053)   // "8BPS"
+		return stbi__errpuc("not PSD", "Corrupt PSD image");
+
+	// Check file type version.
+	if (stbi__get16be(s) != 1)
+		return stbi__errpuc("wrong version", "Unsupported version of PSD image");
+
+	// Skip 6 reserved bytes.
+	stbi__skip(s, 6);
+
+	// Read the number of channels (R, G, B, A, etc).
+	channelCount = stbi__get16be(s);
+	if (channelCount < 0 || channelCount > 16)
+		return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
+
+	// Read the rows and columns of the image.
+	h = stbi__get32be(s);
+	w = stbi__get32be(s);
+
+	// Make sure the depth is 8 bits.
+	bitdepth = stbi__get16be(s);
+	if (bitdepth != 8 && bitdepth != 16)
+		return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
+
+	// Make sure the color mode is RGB.
+	// Valid options are:
+	//   0: Bitmap
+	//   1: Grayscale
+	//   2: Indexed color
+	//   3: RGB color
+	//   4: CMYK color
+	//   7: Multichannel
+	//   8: Duotone
+	//   9: Lab color
+	if (stbi__get16be(s) != 3)
+		return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
+
+	// Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
+	stbi__skip(s, stbi__get32be(s));
+
+	// Skip the image resources.  (resolution, pen tool paths, etc)
+	stbi__skip(s, stbi__get32be(s));
+
+	// Skip the reserved data.
+	stbi__skip(s, stbi__get32be(s));
+
+	// Find out if the data is compressed.
+	// Known values:
+	//   0: no compression
+	//   1: RLE compressed
+	compression = stbi__get16be(s);
+	if (compression > 1)
+		return stbi__errpuc("bad compression", "PSD has an unknown compression format");
+
+	// Check size
+	if (!stbi__mad3sizes_valid(4, w, h, 0))
+		return stbi__errpuc("too large", "Corrupt PSD");
+
+	// Create the destination image.
+
+	if (!compression && bitdepth == 16 && bpc == 16) {
+		out = (stbi_uc*)stbi__malloc_mad3(8, w, h, 0);
+		ri->bits_per_channel = 16;
+	}
+	else
+		out = (stbi_uc*)stbi__malloc(4 * w * h);
+
+	if (!out) return stbi__errpuc("outofmem", "Out of memory");
+	pixelCount = w * h;
+
+	// Initialize the data to zero.
+	//memset( out, 0, pixelCount * 4 );
+
+	// Finally, the image data.
+	if (compression) {
+		// RLE as used by .PSD and .TIFF
+		// Loop until you get the number of unpacked bytes you are expecting:
+		//     Read the next source byte into n.
+		//     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
+		//     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
+		//     Else if n is 128, noop.
+		// Endloop
+
+		// The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
+		// which we're going to just skip.
+		stbi__skip(s, h * channelCount * 2);
+
+		// Read the RLE data by channel.
+		for (channel = 0; channel < 4; channel++) {
+			stbi_uc* p;
+
+			p = out + channel;
+			if (channel >= channelCount) {
+				// Fill this channel with default data.
+				for (i = 0; i < pixelCount; i++, p += 4)
+					* p = (channel == 3 ? 255 : 0);
+			}
+			else {
+				// Read the RLE data.
+				if (!stbi__psd_decode_rle(s, p, pixelCount)) {
+					STBI_FREE(out);
+					return stbi__errpuc("corrupt", "bad RLE data");
+				}
+			}
+		}
+
+	}
+	else {
+		// We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
+		// where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
+
+		// Read the data by channel.
+		for (channel = 0; channel < 4; channel++) {
+			if (channel >= channelCount) {
+				// Fill this channel with default data.
+				if (bitdepth == 16 && bpc == 16) {
+					stbi__uint16* q = ((stbi__uint16*)out) + channel;
+					stbi__uint16 val = channel == 3 ? 65535 : 0;
+					for (i = 0; i < pixelCount; i++, q += 4)
+						* q = val;
+				}
+				else {
+					stbi_uc* p = out + channel;
+					stbi_uc val = channel == 3 ? 255 : 0;
+					for (i = 0; i < pixelCount; i++, p += 4)
+						* p = val;
+				}
+			}
+			else {
+				if (ri->bits_per_channel == 16) {    // output bpc
+					stbi__uint16* q = ((stbi__uint16*)out) + channel;
+					for (i = 0; i < pixelCount; i++, q += 4)
+						* q = (stbi__uint16)stbi__get16be(s);
+				}
+				else {
+					stbi_uc* p = out + channel;
+					if (bitdepth == 16) {  // input bpc
+						for (i = 0; i < pixelCount; i++, p += 4)
+							* p = (stbi_uc)(stbi__get16be(s) >> 8);
+					}
+					else {
+						for (i = 0; i < pixelCount; i++, p += 4)
+							* p = stbi__get8(s);
+					}
+				}
+			}
+		}
+	}
+
+	// remove weird white matte from PSD
+	if (channelCount >= 4) {
+		if (ri->bits_per_channel == 16) {
+			for (i = 0; i < w * h; ++i) {
+				stbi__uint16* pixel = (stbi__uint16*)out + 4 * i;
+				if (pixel[3] != 0 && pixel[3] != 65535) {
+					float a = pixel[3] / 65535.0f;
+					float ra = 1.0f / a;
+					float inv_a = 65535.0f * (1 - ra);
+					pixel[0] = (stbi__uint16)(pixel[0] * ra + inv_a);
+					pixel[1] = (stbi__uint16)(pixel[1] * ra + inv_a);
+					pixel[2] = (stbi__uint16)(pixel[2] * ra + inv_a);
+				}
+			}
+		}
+		else {
+			for (i = 0; i < w * h; ++i) {
+				unsigned char* pixel = out + 4 * i;
+				if (pixel[3] != 0 && pixel[3] != 255) {
+					float a = pixel[3] / 255.0f;
+					float ra = 1.0f / a;
+					float inv_a = 255.0f * (1 - ra);
+					pixel[0] = (unsigned char)(pixel[0] * ra + inv_a);
+					pixel[1] = (unsigned char)(pixel[1] * ra + inv_a);
+					pixel[2] = (unsigned char)(pixel[2] * ra + inv_a);
+				}
+			}
+		}
+	}
+
+	// convert to desired output format
+	if (req_comp && req_comp != 4) {
+		if (ri->bits_per_channel == 16)
+			out = (stbi_uc*)stbi__convert_format16((stbi__uint16*)out, 4, req_comp, w, h);
+		else
+			out = stbi__convert_format(out, 4, req_comp, w, h);
+		if (out == NULL) return out; // stbi__convert_format frees input on failure
+	}
+
+	if (comp)* comp = 4;
+	*y = h;
+	*x = w;
+
+	return out;
+}
+#endif
+
+// *************************************************************************************************
+// Softimage PIC loader
+// by Tom Seddon
+//
+// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
+// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
+
+#ifndef STBI_NO_PIC
+static int stbi__pic_is4(stbi__context* s, const char* str)
+{
+	int i;
+	for (i = 0; i < 4; ++i)
+		if (stbi__get8(s) != (stbi_uc)str[i])
+			return 0;
+
+	return 1;
+}
+
+static int stbi__pic_test_core(stbi__context* s)
+{
+	int i;
+
+	if (!stbi__pic_is4(s, "\x53\x80\xF6\x34"))
+		return 0;
+
+	for (i = 0; i < 84; ++i)
+		stbi__get8(s);
+
+	if (!stbi__pic_is4(s, "PICT"))
+		return 0;
+
+	return 1;
+}
+
+typedef struct
+{
+	stbi_uc size, type, channel;
+} stbi__pic_packet;
+
+static stbi_uc* stbi__readval(stbi__context* s, int channel, stbi_uc* dest)
+{
+	int mask = 0x80, i;
+
+	for (i = 0; i < 4; ++i, mask >>= 1) {
+		if (channel & mask) {
+			if (stbi__at_eof(s)) return stbi__errpuc("bad file", "PIC file too short");
+			dest[i] = stbi__get8(s);
+		}
+	}
+
+	return dest;
+}
+
+static void stbi__copyval(int channel, stbi_uc* dest, const stbi_uc* src)
+{
+	int mask = 0x80, i;
+
+	for (i = 0; i < 4; ++i, mask >>= 1)
+		if (channel & mask)
+			dest[i] = src[i];
+}
+
+static stbi_uc* stbi__pic_load_core(stbi__context* s, int width, int height, int* comp, stbi_uc* result)
+{
+	int act_comp = 0, num_packets = 0, y, chained;
+	stbi__pic_packet packets[10];
+
+	// this will (should...) cater for even some bizarre stuff like having data
+	 // for the same channel in multiple packets.
+	do {
+		stbi__pic_packet* packet;
+
+		if (num_packets == sizeof(packets) / sizeof(packets[0]))
+			return stbi__errpuc("bad format", "too many packets");
+
+		packet = &packets[num_packets++];
+
+		chained = stbi__get8(s);
+		packet->size = stbi__get8(s);
+		packet->type = stbi__get8(s);
+		packet->channel = stbi__get8(s);
+
+		act_comp |= packet->channel;
+
+		if (stbi__at_eof(s))          return stbi__errpuc("bad file", "file too short (reading packets)");
+		if (packet->size != 8)  return stbi__errpuc("bad format", "packet isn't 8bpp");
+	} while (chained);
+
+	*comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
+
+	for (y = 0; y < height; ++y) {
+		int packet_idx;
+
+		for (packet_idx = 0; packet_idx < num_packets; ++packet_idx) {
+			stbi__pic_packet* packet = &packets[packet_idx];
+			stbi_uc* dest = result + y * width * 4;
+
+			switch (packet->type) {
+			default:
+				return stbi__errpuc("bad format", "packet has bad compression type");
+
+			case 0: {//uncompressed
+				int x;
+
+				for (x = 0; x < width; ++x, dest += 4)
+					if (!stbi__readval(s, packet->channel, dest))
+						return 0;
+				break;
+			}
+
+			case 1://Pure RLE
+			{
+				int left = width, i;
+
+				while (left > 0) {
+					stbi_uc count, value[4];
+
+					count = stbi__get8(s);
+					if (stbi__at_eof(s))   return stbi__errpuc("bad file", "file too short (pure read count)");
+
+					if (count > left)
+						count = (stbi_uc)left;
+
+					if (!stbi__readval(s, packet->channel, value))  return 0;
+
+					for (i = 0; i < count; ++i, dest += 4)
+						stbi__copyval(packet->channel, dest, value);
+					left -= count;
+				}
+			}
+			break;
+
+			case 2: {//Mixed RLE
+				int left = width;
+				while (left > 0) {
+					int count = stbi__get8(s), i;
+					if (stbi__at_eof(s))  return stbi__errpuc("bad file", "file too short (mixed read count)");
+
+					if (count >= 128) { // Repeated
+						stbi_uc value[4];
+
+						if (count == 128)
+							count = stbi__get16be(s);
+						else
+							count -= 127;
+						if (count > left)
+							return stbi__errpuc("bad file", "scanline overrun");
+
+						if (!stbi__readval(s, packet->channel, value))
+							return 0;
+
+						for (i = 0; i < count; ++i, dest += 4)
+							stbi__copyval(packet->channel, dest, value);
+					}
+					else { // Raw
+						++count;
+						if (count > left) return stbi__errpuc("bad file", "scanline overrun");
+
+						for (i = 0; i < count; ++i, dest += 4)
+							if (!stbi__readval(s, packet->channel, dest))
+								return 0;
+					}
+					left -= count;
+				}
+				break;
+			}
+			}
+		}
+	}
+
+	return result;
+}
+
+static void* stbi__pic_load(stbi__context* s, int* px, int* py, int* comp, int req_comp, stbi__result_info* ri)
+{
+	stbi_uc* result;
+	int i, x, y, internal_comp;
+	STBI_NOTUSED(ri);
+
+	if (!comp) comp = &internal_comp;
+
+	for (i = 0; i < 92; ++i)
+		stbi__get8(s);
+
+	x = stbi__get16be(s);
+	y = stbi__get16be(s);
+	if (stbi__at_eof(s))  return stbi__errpuc("bad file", "file too short (pic header)");
+	if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
+
+	stbi__get32be(s); //skip `ratio'
+	stbi__get16be(s); //skip `fields'
+	stbi__get16be(s); //skip `pad'
+
+	// intermediate buffer is RGBA
+	result = (stbi_uc*)stbi__malloc_mad3(x, y, 4, 0);
+	memset(result, 0xff, x * y * 4);
+
+	if (!stbi__pic_load_core(s, x, y, comp, result)) {
+		STBI_FREE(result);
+		result = 0;
+	}
+	*px = x;
+	*py = y;
+	if (req_comp == 0) req_comp = *comp;
+	result = stbi__convert_format(result, 4, req_comp, x, y);
+
+	return result;
+}
+
+static int stbi__pic_test(stbi__context* s)
+{
+	int r = stbi__pic_test_core(s);
+	stbi__rewind(s);
+	return r;
+}
+#endif
+
+// *************************************************************************************************
+// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
+
+#ifndef STBI_NO_GIF
+typedef struct
+{
+	stbi__int16 prefix;
+	stbi_uc first;
+	stbi_uc suffix;
+} stbi__gif_lzw;
+
+typedef struct
+{
+	int w, h;
+	stbi_uc* out;                 // output buffer (always 4 components)
+	stbi_uc* background;          // The current "background" as far as a gif is concerned
+	stbi_uc* history;
+	int flags, bgindex, ratio, transparent, eflags;
+	stbi_uc  pal[256][4];
+	stbi_uc lpal[256][4];
+	stbi__gif_lzw codes[8192];
+	stbi_uc* color_table;
+	int parse, step;
+	int lflags;
+	int start_x, start_y;
+	int max_x, max_y;
+	int cur_x, cur_y;
+	int line_size;
+	int delay;
+} stbi__gif;
+
+static int stbi__gif_test_raw(stbi__context* s)
+{
+	int sz;
+	if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
+	sz = stbi__get8(s);
+	if (sz != '9' && sz != '7') return 0;
+	if (stbi__get8(s) != 'a') return 0;
+	return 1;
+}
+
+static int stbi__gif_test(stbi__context* s)
+{
+	int r = stbi__gif_test_raw(s);
+	stbi__rewind(s);
+	return r;
+}
+
+static void stbi__gif_parse_colortable(stbi__context* s, stbi_uc pal[256][4], int num_entries, int transp)
+{
+	int i;
+	for (i = 0; i < num_entries; ++i) {
+		pal[i][2] = stbi__get8(s);
+		pal[i][1] = stbi__get8(s);
+		pal[i][0] = stbi__get8(s);
+		pal[i][3] = transp == i ? 0 : 255;
+	}
+}
+
+static int stbi__gif_header(stbi__context* s, stbi__gif* g, int* comp, int is_info)
+{
+	stbi_uc version;
+	if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
+		return stbi__err("not GIF", "Corrupt GIF");
+
+	version = stbi__get8(s);
+	if (version != '7' && version != '9')    return stbi__err("not GIF", "Corrupt GIF");
+	if (stbi__get8(s) != 'a')                return stbi__err("not GIF", "Corrupt GIF");
+
+	stbi__g_failure_reason = "";
+	g->w = stbi__get16le(s);
+	g->h = stbi__get16le(s);
+	g->flags = stbi__get8(s);
+	g->bgindex = stbi__get8(s);
+	g->ratio = stbi__get8(s);
+	g->transparent = -1;
+
+	if (comp != 0)* comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
+
+	if (is_info) return 1;
+
+	if (g->flags & 0x80)
+		stbi__gif_parse_colortable(s, g->pal, 2 << (g->flags & 7), -1);
+
+	return 1;
+}
+
+static int stbi__gif_info_raw(stbi__context* s, int* x, int* y, int* comp)
+{
+	stbi__gif* g = (stbi__gif*)stbi__malloc(sizeof(stbi__gif));
+	if (!stbi__gif_header(s, g, comp, 1)) {
+		STBI_FREE(g);
+		stbi__rewind(s);
+		return 0;
+	}
+	if (x)* x = g->w;
+	if (y)* y = g->h;
+	STBI_FREE(g);
+	return 1;
+}
+
+static void stbi__out_gif_code(stbi__gif* g, stbi__uint16 code)
+{
+	stbi_uc* p, * c;
+	int idx;
+
+	// recurse to decode the prefixes, since the linked-list is backwards,
+	// and working backwards through an interleaved image would be nasty
+	if (g->codes[code].prefix >= 0)
+		stbi__out_gif_code(g, g->codes[code].prefix);
+
+	if (g->cur_y >= g->max_y) return;
+
+	idx = g->cur_x + g->cur_y;
+	p = &g->out[idx];
+	g->history[idx / 4] = 1;
+
+	c = &g->color_table[g->codes[code].suffix * 4];
+	if (c[3] > 128) { // don't render transparent pixels; 
+		p[0] = c[2];
+		p[1] = c[1];
+		p[2] = c[0];
+		p[3] = c[3];
+	}
+	g->cur_x += 4;
+
+	if (g->cur_x >= g->max_x) {
+		g->cur_x = g->start_x;
+		g->cur_y += g->step;
+
+		while (g->cur_y >= g->max_y && g->parse > 0) {
+			g->step = (1 << g->parse) * g->line_size;
+			g->cur_y = g->start_y + (g->step >> 1);
+			--g->parse;
+		}
+	}
+}
+
+static stbi_uc* stbi__process_gif_raster(stbi__context* s, stbi__gif* g)
+{
+	stbi_uc lzw_cs;
+	stbi__int32 len, init_code;
+	stbi__uint32 first;
+	stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
+	stbi__gif_lzw* p;
+
+	lzw_cs = stbi__get8(s);
+	if (lzw_cs > 12) return NULL;
+	clear = 1 << lzw_cs;
+	first = 1;
+	codesize = lzw_cs + 1;
+	codemask = (1 << codesize) - 1;
+	bits = 0;
+	valid_bits = 0;
+	for (init_code = 0; init_code < clear; init_code++) {
+		g->codes[init_code].prefix = -1;
+		g->codes[init_code].first = (stbi_uc)init_code;
+		g->codes[init_code].suffix = (stbi_uc)init_code;
+	}
+
+	// support no starting clear code
+	avail = clear + 2;
+	oldcode = -1;
+
+	len = 0;
+	for (;;) {
+		if (valid_bits < codesize) {
+			if (len == 0) {
+				len = stbi__get8(s); // start new block
+				if (len == 0)
+					return g->out;
+			}
+			--len;
+			bits |= (stbi__int32)stbi__get8(s) << valid_bits;
+			valid_bits += 8;
+		}
+		else {
+			stbi__int32 code = bits & codemask;
+			bits >>= codesize;
+			valid_bits -= codesize;
+			// @OPTIMIZE: is there some way we can accelerate the non-clear path?
+			if (code == clear) {  // clear code
+				codesize = lzw_cs + 1;
+				codemask = (1 << codesize) - 1;
+				avail = clear + 2;
+				oldcode = -1;
+				first = 0;
+			}
+			else if (code == clear + 1) { // end of stream code
+				stbi__skip(s, len);
+				while ((len = stbi__get8(s)) > 0)
+					stbi__skip(s, len);
+				return g->out;
+			}
+			else if (code <= avail) {
+				if (first) {
+					return stbi__errpuc("no clear code", "Corrupt GIF");
+				}
+
+				if (oldcode >= 0) {
+					p = &g->codes[avail++];
+					if (avail > 8192) {
+						return stbi__errpuc("too many codes", "Corrupt GIF");
+					}
+
+					p->prefix = (stbi__int16)oldcode;
+					p->first = g->codes[oldcode].first;
+					p->suffix = (code == avail) ? p->first : g->codes[code].first;
+				}
+				else if (code == avail)
+					return stbi__errpuc("illegal code in raster", "Corrupt GIF");
+
+				stbi__out_gif_code(g, (stbi__uint16)code);
+
+				if ((avail & codemask) == 0 && avail <= 0x0FFF) {
+					codesize++;
+					codemask = (1 << codesize) - 1;
+				}
+
+				oldcode = code;
+			}
+			else {
+				return stbi__errpuc("illegal code in raster", "Corrupt GIF");
+			}
+		}
+	}
+}
+
+// this function is designed to support animated gifs, although stb_image doesn't support it
+// two back is the image from two frames ago, used for a very specific disposal format
+static stbi_uc* stbi__gif_load_next(stbi__context* s, stbi__gif* g, int* comp, int req_comp, stbi_uc* two_back)
+{
+	int dispose;
+	int first_frame;
+	int pi;
+	int pcount;
+
+	// on first frame, any non-written pixels get the background colour (non-transparent)
+	first_frame = 0;
+	if (g->out == 0) {
+		if (!stbi__gif_header(s, g, comp, 0))     return 0; // stbi__g_failure_reason set by stbi__gif_header
+		g->out = (stbi_uc*)stbi__malloc(4 * g->w * g->h);
+		g->background = (stbi_uc*)stbi__malloc(4 * g->w * g->h);
+		g->history = (stbi_uc*)stbi__malloc(g->w * g->h);
+		if (g->out == 0)                      return stbi__errpuc("outofmem", "Out of memory");
+
+		// image is treated as "tranparent" at the start - ie, nothing overwrites the current background; 
+		// background colour is only used for pixels that are not rendered first frame, after that "background"
+		// color refers to teh color that was there the previous frame. 
+		memset(g->out, 0x00, 4 * g->w * g->h);
+		memset(g->background, 0x00, 4 * g->w * g->h); // state of the background (starts transparent)
+		memset(g->history, 0x00, g->w * g->h);        // pixels that were affected previous frame
+		first_frame = 1;
+	}
+	else {
+		// second frame - how do we dispoase of the previous one?
+		dispose = (g->eflags & 0x1C) >> 2;
+		pcount = g->w * g->h;
+
+		if ((dispose == 3) && (two_back == 0)) {
+			dispose = 2; // if I don't have an image to revert back to, default to the old background
+		}
+
+		if (dispose == 3) { // use previous graphic
+			for (pi = 0; pi < pcount; ++pi) {
+				if (g->history[pi]) {
+					memcpy(&g->out[pi * 4], &two_back[pi * 4], 4);
+				}
+			}
+		}
+		else if (dispose == 2) {
+			// restore what was changed last frame to background before that frame; 
+			for (pi = 0; pi < pcount; ++pi) {
+				if (g->history[pi]) {
+					memcpy(&g->out[pi * 4], &g->background[pi * 4], 4);
+				}
+			}
+		}
+		else {
+			// This is a non-disposal case eithe way, so just 
+			// leave the pixels as is, and they will become the new background
+			// 1: do not dispose
+			// 0:  not specified.
+		}
+
+		// background is what out is after the undoing of the previou frame; 
+		memcpy(g->background, g->out, 4 * g->w * g->h);
+	}
+
+	// clear my history; 
+	memset(g->history, 0x00, g->w * g->h);        // pixels that were affected previous frame
+
+	for (;;) {
+		int tag = stbi__get8(s);
+		switch (tag) {
+		case 0x2C: /* Image Descriptor */
+		{
+			stbi__int32 x, y, w, h;
+			stbi_uc* o;
+
+			x = stbi__get16le(s);
+			y = stbi__get16le(s);
+			w = stbi__get16le(s);
+			h = stbi__get16le(s);
+			if (((x + w) > (g->w)) || ((y + h) > (g->h)))
+				return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
+
+			g->line_size = g->w * 4;
+			g->start_x = x * 4;
+			g->start_y = y * g->line_size;
+			g->max_x = g->start_x + w * 4;
+			g->max_y = g->start_y + h * g->line_size;
+			g->cur_x = g->start_x;
+			g->cur_y = g->start_y;
+
+			g->lflags = stbi__get8(s);
+
+			if (g->lflags & 0x40) {
+				g->step = 8 * g->line_size; // first interlaced spacing
+				g->parse = 3;
+			}
+			else {
+				g->step = g->line_size;
+				g->parse = 0;
+			}
+
+			if (g->lflags & 0x80) {
+				stbi__gif_parse_colortable(s, g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
+				g->color_table = (stbi_uc*)g->lpal;
+			}
+			else if (g->flags & 0x80) {
+				g->color_table = (stbi_uc*)g->pal;
+			}
+			else
+				return stbi__errpuc("missing color table", "Corrupt GIF");
+
+			o = stbi__process_gif_raster(s, g);
+			if (o == NULL) return NULL;
+
+			// if this was the first frame, 
+			pcount = g->w * g->h;
+			if (first_frame && (g->bgindex > 0)) {
+				// if first frame, any pixel not drawn to gets the background color
+				for (pi = 0; pi < pcount; ++pi) {
+					if (g->history[pi] == 0) {
+						g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be; 
+						memcpy(&g->out[pi * 4], &g->pal[g->bgindex], 4);
+					}
+				}
+			}
+
+			return o;
+		}
+
+		case 0x21: // Comment Extension.
+		{
+			int len;
+			int ext = stbi__get8(s);
+			if (ext == 0xF9) { // Graphic Control Extension.
+				len = stbi__get8(s);
+				if (len == 4) {
+					g->eflags = stbi__get8(s);
+					g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths.
+
+					// unset old transparent
+					if (g->transparent >= 0) {
+						g->pal[g->transparent][3] = 255;
+					}
+					if (g->eflags & 0x01) {
+						g->transparent = stbi__get8(s);
+						if (g->transparent >= 0) {
+							g->pal[g->transparent][3] = 0;
+						}
+					}
+					else {
+						// don't need transparent
+						stbi__skip(s, 1);
+						g->transparent = -1;
+					}
+				}
+				else {
+					stbi__skip(s, len);
+					break;
+				}
+			}
+			while ((len = stbi__get8(s)) != 0) {
+				stbi__skip(s, len);
+			}
+			break;
+		}
+
+		case 0x3B: // gif stream termination code
+			return (stbi_uc*)s; // using '1' causes warning on some compilers
+
+		default:
+			return stbi__errpuc("unknown code", "Corrupt GIF");
+		}
+	}
+}
+
+static void* stbi__load_gif_main(stbi__context* s, int** delays, int* x, int* y, int* z, int* comp, int req_comp)
+{
+	if (stbi__gif_test(s)) {
+		int layers = 0;
+		stbi_uc* u = 0;
+		stbi_uc* out = 0;
+		stbi_uc* two_back = 0;
+		stbi__gif g;
+		int stride;
+		memset(&g, 0, sizeof(g));
+		if (delays) {
+			*delays = 0;
+		}
+
+		do {
+			u = stbi__gif_load_next(s, &g, comp, req_comp, two_back);
+			if (u == (stbi_uc*)s) u = 0;  // end of animated gif marker
+
+			if (u) {
+				*x = g.w;
+				*y = g.h;
+				++layers;
+				stride = g.w * g.h * 4;
+
+				if (out) {
+					out = (stbi_uc*)STBI_REALLOC(out, layers * stride);
+					if (delays) {
+						*delays = (int*)STBI_REALLOC(*delays, sizeof(int) * layers);
+					}
+				}
+				else {
+					out = (stbi_uc*)stbi__malloc(layers * stride);
+					if (delays) {
+						*delays = (int*)stbi__malloc(layers * sizeof(int));
+					}
+				}
+				memcpy(out + ((layers - 1) * stride), u, stride);
+				if (layers >= 2) {
+					two_back = out - 2 * stride;
+				}
+
+				if (delays) {
+					(*delays)[layers - 1U] = g.delay;
+				}
+			}
+		} while (u != 0);
+
+		// free temp buffer; 
+		STBI_FREE(g.out);
+		STBI_FREE(g.history);
+		STBI_FREE(g.background);
+
+		// do the final conversion after loading everything; 
+		if (req_comp && req_comp != 4)
+			out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h);
+
+		*z = layers;
+		return out;
+	}
+	else {
+		return stbi__errpuc("not GIF", "Image was not as a gif type.");
+	}
+}
+
+static void* stbi__gif_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri)
+{
+	stbi_uc* u = 0;
+	stbi__gif g;
+	memset(&g, 0, sizeof(g));
+
+	u = stbi__gif_load_next(s, &g, comp, req_comp, 0);
+	if (u == (stbi_uc*)s) u = 0;  // end of animated gif marker
+	if (u) {
+		*x = g.w;
+		*y = g.h;
+
+		// moved conversion to after successful load so that the same
+		// can be done for multiple frames. 
+		if (req_comp && req_comp != 4)
+			u = stbi__convert_format(u, 4, req_comp, g.w, g.h);
+	}
+
+	// free buffers needed for multiple frame loading; 
+	STBI_FREE(g.history);
+	STBI_FREE(g.background);
+
+	return u;
+}
+
+static int stbi__gif_info(stbi__context* s, int* x, int* y, int* comp)
+{
+	return stbi__gif_info_raw(s, x, y, comp);
+}
+#endif
+
+// *************************************************************************************************
+// Radiance RGBE HDR loader
+// originally by Nicolas Schulz
+#ifndef STBI_NO_HDR
+static int stbi__hdr_test_core(stbi__context* s, const char* signature)
+{
+	int i;
+	for (i = 0; signature[i]; ++i)
+		if (stbi__get8(s) != signature[i])
+			return 0;
+	stbi__rewind(s);
+	return 1;
+}
+
+static int stbi__hdr_test(stbi__context* s)
+{
+	int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
+	stbi__rewind(s);
+	if (!r) {
+		r = stbi__hdr_test_core(s, "#?RGBE\n");
+		stbi__rewind(s);
+	}
+	return r;
+}
+
+#define STBI__HDR_BUFLEN  1024
+static char* stbi__hdr_gettoken(stbi__context * z, char* buffer)
+{
+	int len = 0;
+	char c = '\0';
+
+	c = (char)stbi__get8(z);
+
+	while (!stbi__at_eof(z) && c != '\n') {
+		buffer[len++] = c;
+		if (len == STBI__HDR_BUFLEN - 1) {
+			// flush to end of line
+			while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
+				;
+			break;
+		}
+		c = (char)stbi__get8(z);
+	}
+
+	buffer[len] = 0;
+	return buffer;
+}
+
+static void stbi__hdr_convert(float* output, stbi_uc* input, int req_comp)
+{
+	if (input[3] != 0) {
+		float f1;
+		// Exponent
+		f1 = (float)ldexp(1.0f, input[3] - (int)(128 + 8));
+		if (req_comp <= 2)
+			output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
+		else {
+			output[0] = input[0] * f1;
+			output[1] = input[1] * f1;
+			output[2] = input[2] * f1;
+		}
+		if (req_comp == 2) output[1] = 1;
+		if (req_comp == 4) output[3] = 1;
+	}
+	else {
+		switch (req_comp) {
+		case 4: output[3] = 1; /* fallthrough */
+		case 3: output[0] = output[1] = output[2] = 0;
+			break;
+		case 2: output[1] = 1; /* fallthrough */
+		case 1: output[0] = 0;
+			break;
+		}
+	}
+}
+
+static float* stbi__hdr_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri)
+{
+	char buffer[STBI__HDR_BUFLEN];
+	char* token;
+	int valid = 0;
+	int width, height;
+	stbi_uc* scanline;
+	float* hdr_data;
+	int len;
+	unsigned char count, value;
+	int i, j, k, c1, c2, z;
+	const char* headerToken;
+	STBI_NOTUSED(ri);
+
+	// Check identifier
+	headerToken = stbi__hdr_gettoken(s, buffer);
+	if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
+		return stbi__errpf("not HDR", "Corrupt HDR image");
+
+	// Parse header
+	for (;;) {
+		token = stbi__hdr_gettoken(s, buffer);
+		if (token[0] == 0) break;
+		if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
+	}
+
+	if (!valid)    return stbi__errpf("unsupported format", "Unsupported HDR format");
+
+	// Parse width and height
+	// can't use sscanf() if we're not using stdio!
+	token = stbi__hdr_gettoken(s, buffer);
+	if (strncmp(token, "-Y ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
+	token += 3;
+	height = (int)strtol(token, &token, 10);
+	while (*token == ' ') ++token;
+	if (strncmp(token, "+X ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
+	token += 3;
+	width = (int)strtol(token, NULL, 10);
+
+	*x = width;
+	*y = height;
+
+	if (comp)* comp = 3;
+	if (req_comp == 0) req_comp = 3;
+
+	if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
+		return stbi__errpf("too large", "HDR image is too large");
+
+	// Read data
+	hdr_data = (float*)stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
+	if (!hdr_data)
+		return stbi__errpf("outofmem", "Out of memory");
+
+	// Load image data
+	// image data is stored as some number of sca
+	if (width < 8 || width >= 32768) {
+		// Read flat data
+		for (j = 0; j < height; ++j) {
+			for (i = 0; i < width; ++i) {
+				stbi_uc rgbe[4];
+			main_decode_loop:
+				stbi__getn(s, rgbe, 4);
+				stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
+			}
+		}
+	}
+	else {
+		// Read RLE-encoded data
+		scanline = NULL;
+
+		for (j = 0; j < height; ++j) {
+			c1 = stbi__get8(s);
+			c2 = stbi__get8(s);
+			len = stbi__get8(s);
+			if (c1 != 2 || c2 != 2 || (len & 0x80)) {
+				// not run-length encoded, so we have to actually use THIS data as a decoded
+				// pixel (note this can't be a valid pixel--one of RGB must be >= 128)
+				stbi_uc rgbe[4];
+				rgbe[0] = (stbi_uc)c1;
+				rgbe[1] = (stbi_uc)c2;
+				rgbe[2] = (stbi_uc)len;
+				rgbe[3] = (stbi_uc)stbi__get8(s);
+				stbi__hdr_convert(hdr_data, rgbe, req_comp);
+				i = 1;
+				j = 0;
+				STBI_FREE(scanline);
+				goto main_decode_loop; // yes, this makes no sense
+			}
+			len <<= 8;
+			len |= stbi__get8(s);
+			if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
+			if (scanline == NULL) {
+				scanline = (stbi_uc*)stbi__malloc_mad2(width, 4, 0);
+				if (!scanline) {
+					STBI_FREE(hdr_data);
+					return stbi__errpf("outofmem", "Out of memory");
+				}
+			}
+
+			for (k = 0; k < 4; ++k) {
+				int nleft;
+				i = 0;
+				while ((nleft = width - i) > 0) {
+					count = stbi__get8(s);
+					if (count > 128) {
+						// Run
+						value = stbi__get8(s);
+						count -= 128;
+						if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
+						for (z = 0; z < count; ++z)
+							scanline[i++ * 4 + k] = value;
+					}
+					else {
+						// Dump
+						if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
+						for (z = 0; z < count; ++z)
+							scanline[i++ * 4 + k] = stbi__get8(s);
+					}
+				}
+			}
+			for (i = 0; i < width; ++i)
+				stbi__hdr_convert(hdr_data + (j * width + i) * req_comp, scanline + i * 4, req_comp);
+		}
+		if (scanline)
+			STBI_FREE(scanline);
+	}
+
+	return hdr_data;
+}
+
+static int stbi__hdr_info(stbi__context* s, int* x, int* y, int* comp)
+{
+	char buffer[STBI__HDR_BUFLEN];
+	char* token;
+	int valid = 0;
+	int dummy;
+
+	if (!x) x = &dummy;
+	if (!y) y = &dummy;
+	if (!comp) comp = &dummy;
+
+	if (stbi__hdr_test(s) == 0) {
+		stbi__rewind(s);
+		return 0;
+	}
+
+	for (;;) {
+		token = stbi__hdr_gettoken(s, buffer);
+		if (token[0] == 0) break;
+		if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
+	}
+
+	if (!valid) {
+		stbi__rewind(s);
+		return 0;
+	}
+	token = stbi__hdr_gettoken(s, buffer);
+	if (strncmp(token, "-Y ", 3)) {
+		stbi__rewind(s);
+		return 0;
+	}
+	token += 3;
+	*y = (int)strtol(token, &token, 10);
+	while (*token == ' ') ++token;
+	if (strncmp(token, "+X ", 3)) {
+		stbi__rewind(s);
+		return 0;
+	}
+	token += 3;
+	*x = (int)strtol(token, NULL, 10);
+	*comp = 3;
+	return 1;
+}
+#endif // STBI_NO_HDR
+
+#ifndef STBI_NO_BMP
+static int stbi__bmp_info(stbi__context* s, int* x, int* y, int* comp)
+{
+	void* p;
+	stbi__bmp_data info;
+
+	info.all_a = 255;
+	p = stbi__bmp_parse_header(s, &info);
+	stbi__rewind(s);
+	if (p == NULL)
+		return 0;
+	if (x)* x = s->img_x;
+	if (y)* y = s->img_y;
+	if (comp)* comp = info.ma ? 4 : 3;
+	return 1;
+}
+#endif
+
+#ifndef STBI_NO_PSD
+static int stbi__psd_info(stbi__context* s, int* x, int* y, int* comp)
+{
+	int channelCount, dummy, depth;
+	if (!x) x = &dummy;
+	if (!y) y = &dummy;
+	if (!comp) comp = &dummy;
+	if (stbi__get32be(s) != 0x38425053) {
+		stbi__rewind(s);
+		return 0;
+	}
+	if (stbi__get16be(s) != 1) {
+		stbi__rewind(s);
+		return 0;
+	}
+	stbi__skip(s, 6);
+	channelCount = stbi__get16be(s);
+	if (channelCount < 0 || channelCount > 16) {
+		stbi__rewind(s);
+		return 0;
+	}
+	*y = stbi__get32be(s);
+	*x = stbi__get32be(s);
+	depth = stbi__get16be(s);
+	if (depth != 8 && depth != 16) {
+		stbi__rewind(s);
+		return 0;
+	}
+	if (stbi__get16be(s) != 3) {
+		stbi__rewind(s);
+		return 0;
+	}
+	*comp = 4;
+	return 1;
+}
+
+static int stbi__psd_is16(stbi__context* s)
+{
+	int channelCount, depth;
+	if (stbi__get32be(s) != 0x38425053) {
+		stbi__rewind(s);
+		return 0;
+	}
+	if (stbi__get16be(s) != 1) {
+		stbi__rewind(s);
+		return 0;
+	}
+	stbi__skip(s, 6);
+	channelCount = stbi__get16be(s);
+	if (channelCount < 0 || channelCount > 16) {
+		stbi__rewind(s);
+		return 0;
+	}
+	(void)stbi__get32be(s);
+	(void)stbi__get32be(s);
+	depth = stbi__get16be(s);
+	if (depth != 16) {
+		stbi__rewind(s);
+		return 0;
+	}
+	return 1;
+}
+#endif
+
+#ifndef STBI_NO_PIC
+static int stbi__pic_info(stbi__context* s, int* x, int* y, int* comp)
+{
+	int act_comp = 0, num_packets = 0, chained, dummy;
+	stbi__pic_packet packets[10];
+
+	if (!x) x = &dummy;
+	if (!y) y = &dummy;
+	if (!comp) comp = &dummy;
+
+	if (!stbi__pic_is4(s, "\x53\x80\xF6\x34")) {
+		stbi__rewind(s);
+		return 0;
+	}
+
+	stbi__skip(s, 88);
+
+	*x = stbi__get16be(s);
+	*y = stbi__get16be(s);
+	if (stbi__at_eof(s)) {
+		stbi__rewind(s);
+		return 0;
+	}
+	if ((*x) != 0 && (1 << 28) / (*x) < (*y)) {
+		stbi__rewind(s);
+		return 0;
+	}
+
+	stbi__skip(s, 8);
+
+	do {
+		stbi__pic_packet* packet;
+
+		if (num_packets == sizeof(packets) / sizeof(packets[0]))
+			return 0;
+
+		packet = &packets[num_packets++];
+		chained = stbi__get8(s);
+		packet->size = stbi__get8(s);
+		packet->type = stbi__get8(s);
+		packet->channel = stbi__get8(s);
+		act_comp |= packet->channel;
+
+		if (stbi__at_eof(s)) {
+			stbi__rewind(s);
+			return 0;
+		}
+		if (packet->size != 8) {
+			stbi__rewind(s);
+			return 0;
+		}
+	} while (chained);
+
+	*comp = (act_comp & 0x10 ? 4 : 3);
+
+	return 1;
+}
+#endif
+
+// *************************************************************************************************
+// Portable Gray Map and Portable Pixel Map loader
+// by Ken Miller
+//
+// PGM: http://netpbm.sourceforge.net/doc/pgm.html
+// PPM: http://netpbm.sourceforge.net/doc/ppm.html
+//
+// Known limitations:
+//    Does not support comments in the header section
+//    Does not support ASCII image data (formats P2 and P3)
+//    Does not support 16-bit-per-channel
+
+#ifndef STBI_NO_PNM
+
+static int      stbi__pnm_test(stbi__context* s)
+{
+	char p, t;
+	p = (char)stbi__get8(s);
+	t = (char)stbi__get8(s);
+	if (p != 'P' || (t != '5' && t != '6')) {
+		stbi__rewind(s);
+		return 0;
+	}
+	return 1;
+}
+
+static void* stbi__pnm_load(stbi__context* s, int* x, int* y, int* comp, int req_comp, stbi__result_info* ri)
+{
+	stbi_uc* out;
+	STBI_NOTUSED(ri);
+
+	if (!stbi__pnm_info(s, (int*)& s->img_x, (int*)& s->img_y, (int*)& s->img_n))
+		return 0;
+
+	*x = s->img_x;
+	*y = s->img_y;
+	if (comp)* comp = s->img_n;
+
+	if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0))
+		return stbi__errpuc("too large", "PNM too large");
+
+	out = (stbi_uc*)stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0);
+	if (!out) return stbi__errpuc("outofmem", "Out of memory");
+	stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
+
+	if (req_comp && req_comp != s->img_n) {
+		out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
+		if (out == NULL) return out; // stbi__convert_format frees input on failure
+	}
+	return out;
+}
+
+static int      stbi__pnm_isspace(char c)
+{
+	return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
+}
+
+static void     stbi__pnm_skip_whitespace(stbi__context* s, char* c)
+{
+	for (;;) {
+		while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
+			* c = (char)stbi__get8(s);
+
+		if (stbi__at_eof(s) || *c != '#')
+			break;
+
+		while (!stbi__at_eof(s) && *c != '\n' && *c != '\r')
+			* c = (char)stbi__get8(s);
+	}
+}
+
+static int      stbi__pnm_isdigit(char c)
+{
+	return c >= '0' && c <= '9';
+}
+
+static int      stbi__pnm_getinteger(stbi__context* s, char* c)
+{
+	int value = 0;
+
+	while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
+		value = value * 10 + (*c - '0');
+		*c = (char)stbi__get8(s);
+	}
+
+	return value;
+}
+
+static int      stbi__pnm_info(stbi__context* s, int* x, int* y, int* comp)
+{
+	int maxv, dummy;
+	char c, p, t;
+
+	if (!x) x = &dummy;
+	if (!y) y = &dummy;
+	if (!comp) comp = &dummy;
+
+	stbi__rewind(s);
+
+	// Get identifier
+	p = (char)stbi__get8(s);
+	t = (char)stbi__get8(s);
+	if (p != 'P' || (t != '5' && t != '6')) {
+		stbi__rewind(s);
+		return 0;
+	}
+
+	*comp = (t == '6') ? 3 : 1;  // '5' is 1-component .pgm; '6' is 3-component .ppm
+
+	c = (char)stbi__get8(s);
+	stbi__pnm_skip_whitespace(s, &c);
+
+	*x = stbi__pnm_getinteger(s, &c); // read width
+	stbi__pnm_skip_whitespace(s, &c);
+
+	*y = stbi__pnm_getinteger(s, &c); // read height
+	stbi__pnm_skip_whitespace(s, &c);
+
+	maxv = stbi__pnm_getinteger(s, &c);  // read max value
+
+	if (maxv > 255)
+		return stbi__err("max value > 255", "PPM image not 8-bit");
+	else
+		return 1;
+}
+#endif
+
+static int stbi__info_main(stbi__context* s, int* x, int* y, int* comp)
+{
+#ifndef STBI_NO_JPEG
+	if (stbi__jpeg_info(s, x, y, comp)) return 1;
+#endif
+
+#ifndef STBI_NO_PNG
+	if (stbi__png_info(s, x, y, comp))  return 1;
+#endif
+
+#ifndef STBI_NO_GIF
+	if (stbi__gif_info(s, x, y, comp))  return 1;
+#endif
+
+#ifndef STBI_NO_BMP
+	if (stbi__bmp_info(s, x, y, comp))  return 1;
+#endif
+
+#ifndef STBI_NO_PSD
+	if (stbi__psd_info(s, x, y, comp))  return 1;
+#endif
+
+#ifndef STBI_NO_PIC
+	if (stbi__pic_info(s, x, y, comp))  return 1;
+#endif
+
+#ifndef STBI_NO_PNM
+	if (stbi__pnm_info(s, x, y, comp))  return 1;
+#endif
+
+#ifndef STBI_NO_HDR
+	if (stbi__hdr_info(s, x, y, comp))  return 1;
+#endif
+
+	// test tga last because it's a crappy test!
+#ifndef STBI_NO_TGA
+	if (stbi__tga_info(s, x, y, comp))
+		return 1;
+#endif
+	return stbi__err("unknown image type", "Image not of any known type, or corrupt");
+}
+
+static int stbi__is_16_main(stbi__context* s)
+{
+#ifndef STBI_NO_PNG
+	if (stbi__png_is16(s))  return 1;
+#endif
+
+#ifndef STBI_NO_PSD
+	if (stbi__psd_is16(s))  return 1;
+#endif
+
+	return 0;
+}
+
+#ifndef STBI_NO_STDIO
+STBIDEF int stbi_info(char const* filename, int* x, int* y, int* comp)
+{
+	FILE* f = stbi__fopen(filename, "rb");
+	int result;
+	if (!f) return stbi__err("can't fopen", "Unable to open file");
+	result = stbi_info_from_file(f, x, y, comp);
+	fclose(f);
+	return result;
+}
+
+STBIDEF int stbi_info_from_file(FILE* f, int* x, int* y, int* comp)
+{
+	int r;
+	stbi__context s;
+	long pos = ftell(f);
+	stbi__start_file(&s, f);
+	r = stbi__info_main(&s, x, y, comp);
+	fseek(f, pos, SEEK_SET);
+	return r;
+}
+
+STBIDEF int stbi_is_16_bit(char const* filename)
+{
+	FILE* f = stbi__fopen(filename, "rb");
+	int result;
+	if (!f) return stbi__err("can't fopen", "Unable to open file");
+	result = stbi_is_16_bit_from_file(f);
+	fclose(f);
+	return result;
+}
+
+STBIDEF int stbi_is_16_bit_from_file(FILE* f)
+{
+	int r;
+	stbi__context s;
+	long pos = ftell(f);
+	stbi__start_file(&s, f);
+	r = stbi__is_16_main(&s);
+	fseek(f, pos, SEEK_SET);
+	return r;
+}
+#endif // !STBI_NO_STDIO
+
+STBIDEF int stbi_info_from_memory(stbi_uc const* buffer, int len, int* x, int* y, int* comp)
+{
+	stbi__context s;
+	stbi__start_mem(&s, buffer, len);
+	return stbi__info_main(&s, x, y, comp);
+}
+
+STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const* c, void* user, int* x, int* y, int* comp)
+{
+	stbi__context s;
+	stbi__start_callbacks(&s, (stbi_io_callbacks*)c, user);
+	return stbi__info_main(&s, x, y, comp);
+}
+
+STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const* buffer, int len)
+{
+	stbi__context s;
+	stbi__start_mem(&s, buffer, len);
+	return stbi__is_16_main(&s);
+}
+
+STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const* c, void* user)
+{
+	stbi__context s;
+	stbi__start_callbacks(&s, (stbi_io_callbacks*)c, user);
+	return stbi__is_16_main(&s);
+}
+
+#endif // STB_IMAGE_IMPLEMENTATION
+
+/*
+   revision history:
+	  2.19  (2018-02-11) fix warning
+	  2.18  (2018-01-30) fix warnings
+	  2.17  (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug
+						 1-bit BMP
+						 *_is_16_bit api
+						 avoid warnings
+	  2.16  (2017-07-23) all functions have 16-bit variants;
+						 STBI_NO_STDIO works again;
+						 compilation fixes;
+						 fix rounding in unpremultiply;
+						 optimize vertical flip;
+						 disable raw_len validation;
+						 documentation fixes
+	  2.15  (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
+						 warning fixes; disable run-time SSE detection on gcc;
+						 uniform handling of optional "return" values;
+						 thread-safe initialization of zlib tables
+	  2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
+	  2.13  (2016-11-29) add 16-bit API, only supported for PNG right now
+	  2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
+	  2.11  (2016-04-02) allocate large structures on the stack
+						 remove white matting for transparent PSD
+						 fix reported channel count for PNG & BMP
+						 re-enable SSE2 in non-gcc 64-bit
+						 support RGB-formatted JPEG
+						 read 16-bit PNGs (only as 8-bit)
+	  2.10  (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
+	  2.09  (2016-01-16) allow comments in PNM files
+						 16-bit-per-pixel TGA (not bit-per-component)
+						 info() for TGA could break due to .hdr handling
+						 info() for BMP to shares code instead of sloppy parse
+						 can use STBI_REALLOC_SIZED if allocator doesn't support realloc
+						 code cleanup
+	  2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
+	  2.07  (2015-09-13) fix compiler warnings
+						 partial animated GIF support
+						 limited 16-bpc PSD support
+						 #ifdef unused functions
+						 bug with < 92 byte PIC,PNM,HDR,TGA
+	  2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
+	  2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
+	  2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
+	  2.03  (2015-04-12) extra corruption checking (mmozeiko)
+						 stbi_set_flip_vertically_on_load (nguillemot)
+						 fix NEON support; fix mingw support
+	  2.02  (2015-01-19) fix incorrect assert, fix warning
+	  2.01  (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
+	  2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
+	  2.00  (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
+						 progressive JPEG (stb)
+						 PGM/PPM support (Ken Miller)
+						 STBI_MALLOC,STBI_REALLOC,STBI_FREE
+						 GIF bugfix -- seemingly never worked
+						 STBI_NO_*, STBI_ONLY_*
+	  1.48  (2014-12-14) fix incorrectly-named assert()
+	  1.47  (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
+						 optimize PNG (ryg)
+						 fix bug in interlaced PNG with user-specified channel count (stb)
+	  1.46  (2014-08-26)
+			  fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
+	  1.45  (2014-08-16)
+			  fix MSVC-ARM internal compiler error by wrapping malloc
+	  1.44  (2014-08-07)
+			  various warning fixes from Ronny Chevalier
+	  1.43  (2014-07-15)
+			  fix MSVC-only compiler problem in code changed in 1.42
+	  1.42  (2014-07-09)
+			  don't define _CRT_SECURE_NO_WARNINGS (affects user code)
+			  fixes to stbi__cleanup_jpeg path
+			  added STBI_ASSERT to avoid requiring assert.h
+	  1.41  (2014-06-25)
+			  fix search&replace from 1.36 that messed up comments/error messages
+	  1.40  (2014-06-22)
+			  fix gcc struct-initialization warning
+	  1.39  (2014-06-15)
+			  fix to TGA optimization when req_comp != number of components in TGA;
+			  fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
+			  add support for BMP version 5 (more ignored fields)
+	  1.38  (2014-06-06)
+			  suppress MSVC warnings on integer casts truncating values
+			  fix accidental rename of 'skip' field of I/O
+	  1.37  (2014-06-04)
+			  remove duplicate typedef
+	  1.36  (2014-06-03)
+			  convert to header file single-file library
+			  if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
+	  1.35  (2014-05-27)
+			  various warnings
+			  fix broken STBI_SIMD path
+			  fix bug where stbi_load_from_file no longer left file pointer in correct place
+			  fix broken non-easy path for 32-bit BMP (possibly never used)
+			  TGA optimization by Arseny Kapoulkine
+	  1.34  (unknown)
+			  use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
+	  1.33  (2011-07-14)
+			  make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
+	  1.32  (2011-07-13)
+			  support for "info" function for all supported filetypes (SpartanJ)
+	  1.31  (2011-06-20)
+			  a few more leak fixes, bug in PNG handling (SpartanJ)
+	  1.30  (2011-06-11)
+			  added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
+			  removed deprecated format-specific test/load functions
+			  removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
+			  error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
+			  fix inefficiency in decoding 32-bit BMP (David Woo)
+	  1.29  (2010-08-16)
+			  various warning fixes from Aurelien Pocheville
+	  1.28  (2010-08-01)
+			  fix bug in GIF palette transparency (SpartanJ)
+	  1.27  (2010-08-01)
+			  cast-to-stbi_uc to fix warnings
+	  1.26  (2010-07-24)
+			  fix bug in file buffering for PNG reported by SpartanJ
+	  1.25  (2010-07-17)
+			  refix trans_data warning (Won Chun)
+	  1.24  (2010-07-12)
+			  perf improvements reading from files on platforms with lock-heavy fgetc()
+			  minor perf improvements for jpeg
+			  deprecated type-specific functions so we'll get feedback if they're needed
+			  attempt to fix trans_data warning (Won Chun)
+	  1.23    fixed bug in iPhone support
+	  1.22  (2010-07-10)
+			  removed image *writing* support
+			  stbi_info support from Jetro Lauha
+			  GIF support from Jean-Marc Lienher
+			  iPhone PNG-extensions from James Brown
+			  warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
+	  1.21    fix use of 'stbi_uc' in header (reported by jon blow)
+	  1.20    added support for Softimage PIC, by Tom Seddon
+	  1.19    bug in interlaced PNG corruption check (found by ryg)
+	  1.18  (2008-08-02)
+			  fix a threading bug (local mutable static)
+	  1.17    support interlaced PNG
+	  1.16    major bugfix - stbi__convert_format converted one too many pixels
+	  1.15    initialize some fields for thread safety
+	  1.14    fix threadsafe conversion bug
+			  header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
+	  1.13    threadsafe
+	  1.12    const qualifiers in the API
+	  1.11    Support installable IDCT, colorspace conversion routines
+	  1.10    Fixes for 64-bit (don't use "unsigned long")
+			  optimized upsampling by Fabian "ryg" Giesen
+	  1.09    Fix format-conversion for PSD code (bad global variables!)
+	  1.08    Thatcher Ulrich's PSD code integrated by Nicolas Schulz
+	  1.07    attempt to fix C++ warning/errors again
+	  1.06    attempt to fix C++ warning/errors again
+	  1.05    fix TGA loading to return correct *comp and use good luminance calc
+	  1.04    default float alpha is 1, not 255; use 'void *' for stbi_image_free
+	  1.03    bugfixes to STBI_NO_STDIO, STBI_NO_HDR
+	  1.02    support for (subset of) HDR files, float interface for preferred access to them
+	  1.01    fix bug: possible bug in handling right-side up bmps... not sure
+			  fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
+	  1.00    interface to zlib that skips zlib header
+	  0.99    correct handling of alpha in palette
+	  0.98    TGA loader by lonesock; dynamically add loaders (untested)
+	  0.97    jpeg errors on too large a file; also catch another malloc failure
+	  0.96    fix detection of invalid v value - particleman@mollyrocket forum
+	  0.95    during header scan, seek to markers in case of padding
+	  0.94    STBI_NO_STDIO to disable stdio usage; rename all #defines the same
+	  0.93    handle jpegtran output; verbose errors
+	  0.92    read 4,8,16,24,32-bit BMP files of several formats
+	  0.91    output 24-bit Windows 3.0 BMP files
+	  0.90    fix a few more warnings; bump version number to approach 1.0
+	  0.61    bugfixes due to Marc LeBlanc, Christopher Lloyd
+	  0.60    fix compiling as c++
+	  0.59    fix warnings: merge Dave Moore's -Wall fixes
+	  0.58    fix bug: zlib uncompressed mode len/nlen was wrong endian
+	  0.57    fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
+	  0.56    fix bug: zlib uncompressed mode len vs. nlen
+	  0.55    fix bug: restart_interval not initialized to 0
+	  0.54    allow NULL for 'int *comp'
+	  0.53    fix bug in png 3->4; speedup png decoding
+	  0.52    png handles req_comp=3,4 directly; minor cleanup; jpeg comments
+	  0.51    obey req_comp requests, 1-component jpegs return as 1-component,
+			  on 'test' only check type, not whether we support this variant
+	  0.50  (2006-11-19)
+			  first released version
+*/
+
+
+/*
+------------------------------------------------------------------------------
+This software is available under 2 licenses -- choose whichever you prefer.
+------------------------------------------------------------------------------
+ALTERNATIVE A - MIT License
+Copyright (c) 2017 Sean Barrett
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+------------------------------------------------------------------------------
+ALTERNATIVE B - Public Domain (www.unlicense.org)
+This is free and unencumbered software released into the public domain.
+Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+software, either in source code form or as a compiled binary, for any purpose,
+commercial or non-commercial, and by any means.
+In jurisdictions that recognize copyright laws, the author or authors of this
+software dedicate any and all copyright interest in the software to the public
+domain. We make this dedication for the benefit of the public at large and to
+the detriment of our heirs and successors. We intend this dedication to be an
+overt act of relinquishment in perpetuity of all present and future rights to
+this software under copyright law.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+------------------------------------------------------------------------------
+*/
diff --git a/src/display.cpp b/src/display.cpp
new file mode 100644
index 0000000..79c680e
--- /dev/null
+++ b/src/display.cpp
@@ -0,0 +1,120 @@
+#include "display.hpp"
+
+Display::Display( int w, int h, std::string title )
+	: mLogger() 
+{
+	
+	mLogger << LOGGER_INFO << "Initializing display" << LOGGER_ENDL;
+	SDL_Init( SDL_INIT_VIDEO | SDL_INIT_AUDIO );
+
+	SDL_GL_SetAttribute( SDL_GL_RED_SIZE, 8 );
+	SDL_GL_SetAttribute( SDL_GL_GREEN_SIZE, 8 );
+	SDL_GL_SetAttribute( SDL_GL_BLUE_SIZE, 8 );
+	SDL_GL_SetAttribute( SDL_GL_ALPHA_SIZE, 8 );
+	SDL_GL_SetAttribute( SDL_GL_BUFFER_SIZE, 32 );
+	SDL_GL_SetAttribute( SDL_GL_DOUBLEBUFFER, 1 );
+
+	SDL_GL_SetAttribute( SDL_GL_MULTISAMPLEBUFFERS, 1 );
+	SDL_GL_SetAttribute( SDL_GL_MULTISAMPLESAMPLES, 4 );
+
+	SDL_GL_SetAttribute( SDL_GL_CONTEXT_MAJOR_VERSION, 4 );
+	SDL_GL_SetAttribute( SDL_GL_CONTEXT_MINOR_VERSION, 5 );
+
+	// Create GL window
+	mLogger << LOGGER_INFO << "Creating window" << LOGGER_ENDL;
+	mWindow = SDL_CreateWindow( title.c_str(),
+		SDL_WINDOWPOS_CENTERED,
+		SDL_WINDOWPOS_CENTERED, w, h,
+		SDL_WINDOW_OPENGL|SDL_WINDOW_RESIZABLE );
+
+	// Create GL context
+	mLogger << LOGGER_INFO << "Creating OpenGL context" << LOGGER_ENDL;
+	mGlContext = SDL_GL_CreateContext( mWindow );
+
+	SDL_SetRelativeMouseMode( SDL_TRUE );
+
+	// Set VSYNC swap interval
+	SDL_GL_SetSwapInterval( 1 );
+
+	mLogger << LOGGER_INFO << "Display set up" << LOGGER_ENDL;
+
+	// Load OpenGL
+	gladLoadGLLoader( SDL_GL_GetProcAddress );
+	glEnable( GL_MULTISAMPLE );
+	// glEnable(GL_CULL_FACE);
+	glCullFace( GL_BACK );
+	glEnable( GL_DEPTH_TEST );
+	// glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
+
+	mLogger << LOGGER_INFO << "Loaded OpenGL" << LOGGER_ENDL;
+	mLogger << LOGGER_ENDL;
+
+	IsWindowOpen = true;
+
+}
+
+void Display::Input( SDL_Event* e )
+{
+
+	Uint8* state = (Uint8*) SDL_GetKeyboardState( NULL );
+
+	while ( SDL_PollEvent( e ) )
+	{
+		switch ( e->type )
+		{
+
+		case SDL_KEYDOWN:
+		{
+			if ( e->key.keysym.sym == SDLK_ESCAPE )
+			{
+				IsMouseActive = !IsMouseActive;
+
+				if ( IsMouseActive )
+					SDL_SetRelativeMouseMode( SDL_TRUE );
+				else
+					SDL_SetRelativeMouseMode( SDL_FALSE );
+			}
+
+			break;
+		}
+
+		case SDL_WINDOWEVENT:
+		{
+			if ( e->window.event == SDL_WINDOWEVENT_RESIZED )
+			{
+				mW = e->window.data1; mH = e->window.data2;
+				// CameraUpdateProjection( mW, mH );
+				glViewport( 0, 0, mW, mH );
+			}
+
+			break;
+		}
+
+		case SDL_QUIT:
+		{
+			IsWindowOpen = false;
+			break;
+		}
+
+		}
+
+		// if ( IsMouseActive ) HandleMouseSDL( *e );
+	}
+
+	// m_player->MoveSDL( state );
+
+}
+
+void Display::PrepareFrame()
+{
+	static const float clear[] = { 186.0f / 255.0f, 214.0f / 255.0f, 254.0f / 255.0f };
+
+	glClear( GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT );
+	glClearBufferfv( GL_COLOR, 0, clear );
+}
+
+void Display::NextFrame()
+{
+	SDL_GL_SwapWindow( mWindow );
+}
+
diff --git a/src/display.hpp b/src/display.hpp
new file mode 100644
index 0000000..487be4b
--- /dev/null
+++ b/src/display.hpp
@@ -0,0 +1,42 @@
+#ifndef MINECRAFT_DISPLAY_H_
+#define MINECRAFT_DISPLAY_H_
+
+#include <string>
+
+#include <logger.h>
+
+#if _WIN32
+#include <SDL.h>
+#else
+#include <SDL2/SDL.h>
+#endif
+
+#include <glad/glad.h>
+#include <KHR/khrplatform.h>
+
+class Display
+{
+public:
+
+	Display( int w, int h, std::string title );
+
+	void Input( SDL_Event* e );
+
+	void PrepareFrame();
+	void NextFrame();
+
+	bool IsWindowOpen = false;
+	bool IsMouseActive = true;
+
+private:
+
+	Logger mLogger;
+
+	SDL_Window* mWindow = nullptr;
+	SDL_GLContext mGlContext = nullptr;
+
+	int mW, mH;
+
+};
+
+#endif
diff --git a/src/settings.hpp b/src/settings.hpp
new file mode 100644
index 0000000..5ab11c0
--- /dev/null
+++ b/src/settings.hpp
@@ -0,0 +1,14 @@
+#ifndef MINECRAFT_SETTINGS_H_
+#define MINECRAFT_SETTINGS_H_
+
+#include <string>
+
+// TODO: import settings and stuff
+// for now this works
+
+static const int WindowWidth = 1000;
+static const int WindowHeight = 600;
+
+static const std::string ResourceBase = MC_RESOURCES;
+
+#endif
diff --git a/src/threadpool.hpp b/src/threadpool.hpp
new file mode 100644
index 0000000..764544e
--- /dev/null
+++ b/src/threadpool.hpp
@@ -0,0 +1,3 @@
+// Threadpool for asset management and other such tasks
+
+
diff --git a/src/utilities.hpp b/src/utilities.hpp
new file mode 100644
index 0000000..21e44ed
--- /dev/null
+++ b/src/utilities.hpp
@@ -0,0 +1,10 @@
+#include <fstream>
+#include <string>
+
+inline std::string LoadTextFromFile( std::string file )
+{
+	std::ifstream t( file );
+	std::string text( (std::istreambuf_iterator<char>( t )),
+		std::istreambuf_iterator<char>() );
+	return text;
+}