v2 / thirdparty / stb_image / stb_image.h
7990 lines · 7104 sloc · 276.32 KB · 97e1b24d307524675dda8f2f091f80fdb1f39437
Raw
1/* stb_image - v2.29 - public domain image loader - http://nothings.org/stb
2 no warranty implied; use at your own risk
3
4 Do this:
5 #define STB_IMAGE_IMPLEMENTATION
6 before you include this file in *one* C or C++ file to create the implementation.
7
8 // i.e. it should look like this:
9 #include ...
10 #include ...
11 #include ...
12 #define STB_IMAGE_IMPLEMENTATION
13 #include "stb_image.h"
14
15 You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
16 And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
17
18
19 QUICK NOTES:
20 Primarily of interest to game developers and other people who can
21 avoid problematic images and only need the trivial interface
22
23 JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
24 PNG 1/2/4/8/16-bit-per-channel
25
26 TGA (not sure what subset, if a subset)
27 BMP non-1bpp, non-RLE
28 PSD (composited view only, no extra channels, 8/16 bit-per-channel)
29
30 GIF (*comp always reports as 4-channel)
31 HDR (radiance rgbE format)
32 PIC (Softimage PIC)
33 PNM (PPM and PGM binary only)
34
35 Animated GIF still needs a proper API, but here's one way to do it:
36 http://gist.github.com/urraka/685d9a6340b26b830d49
37
38 - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
39 - decode from arbitrary I/O callbacks
40 - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
41
42 Full documentation under "DOCUMENTATION" below.
43
44
45LICENSE
46
47 See end of file for license information.
48
49RECENT REVISION HISTORY:
50
51 2.29 (2023-05-xx) optimizations
52 2.28 (2023-01-29) many error fixes, security errors, just tons of stuff
53 2.27 (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes
54 2.26 (2020-07-13) many minor fixes
55 2.25 (2020-02-02) fix warnings
56 2.24 (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically
57 2.23 (2019-08-11) fix clang static analysis warning
58 2.22 (2019-03-04) gif fixes, fix warnings
59 2.21 (2019-02-25) fix typo in comment
60 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
61 2.19 (2018-02-11) fix warning
62 2.18 (2018-01-30) fix warnings
63 2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings
64 2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
65 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
66 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
67 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
68 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
69 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
70 RGB-format JPEG; remove white matting in PSD;
71 allocate large structures on the stack;
72 correct channel count for PNG & BMP
73 2.10 (2016-01-22) avoid warning introduced in 2.09
74 2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
75
76 See end of file for full revision history.
77
78
79 ============================ Contributors =========================
80
81 Image formats Extensions, features
82 Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info)
83 Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info)
84 Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG)
85 Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks)
86 Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG)
87 Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip)
88 Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD)
89 github:urraka (animated gif) Junggon Kim (PNM comments)
90 Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA)
91 socks-the-fox (16-bit PNG)
92 Jeremy Sawicki (handle all ImageNet JPGs)
93 Optimizations & bugfixes Mikhail Morozov (1-bit BMP)
94 Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query)
95 Arseny Kapoulkine Simon Breuss (16-bit PNM)
96 John-Mark Allen
97 Carmelo J Fdez-Aguera
98
99 Bug & warning fixes
100 Marc LeBlanc David Woo Guillaume George Martins Mozeiko
101 Christpher Lloyd Jerry Jansson Joseph Thomson Blazej Dariusz Roszkowski
102 Phil Jordan Dave Moore Roy Eltham
103 Hayaki Saito Nathan Reed Won Chun
104 Luke Graham Johan Duparc Nick Verigakis the Horde3D community
105 Thomas Ruf Ronny Chevalier github:rlyeh
106 Janez Zemva John Bartholomew Michal Cichon github:romigrou
107 Jonathan Blow Ken Hamada Tero Hanninen github:svdijk
108 Eugene Golushkov Laurent Gomila Cort Stratton github:snagar
109 Aruelien Pocheville Sergio Gonzalez Thibault Reuille github:Zelex
110 Cass Everitt Ryamond Barbiero github:grim210
111 Paul Du Bois Engin Manap Aldo Culquicondor github:sammyhw
112 Philipp Wiesemann Dale Weiler Oriol Ferrer Mesia github:phprus
113 Josh Tobin Neil Bickford Matthew Gregan github:poppolopoppo
114 Julian Raschke Gregory Mullen Christian Floisand github:darealshinji
115 Baldur Karlsson Kevin Schmidt JR Smith github:Michaelangel007
116 Brad Weinberger Matvey Cherevko github:mosra
117 Luca Sas Alexander Veselov Zack Middleton [reserved]
118 Ryan C. Gordon [reserved] [reserved]
119 DO NOT ADD YOUR NAME HERE
120
121 Jacko Dirks
122
123 To add your name to the credits, pick a random blank space in the middle and fill it.
124 80% of merge conflicts on stb PRs are due to people adding their name at the end
125 of the credits.
126*/
127
128#ifndef STBI_INCLUDE_STB_IMAGE_H
129#define STBI_INCLUDE_STB_IMAGE_H
130
131// DOCUMENTATION
132//
133// Limitations:
134// - no 12-bit-per-channel JPEG
135// - no JPEGs with arithmetic coding
136// - GIF always returns *comp=4
137//
138// Basic usage (see HDR discussion below for HDR usage):
139// int x,y,n;
140// unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
141// // ... process data if not NULL ...
142// // ... x = width, y = height, n = # 8-bit components per pixel ...
143// // ... replace '0' with '1'..'4' to force that many components per pixel
144// // ... but 'n' will always be the number that it would have been if you said 0
145// stbi_image_free(data);
146//
147// Standard parameters:
148// int *x -- outputs image width in pixels
149// int *y -- outputs image height in pixels
150// int *channels_in_file -- outputs # of image components in image file
151// int desired_channels -- if non-zero, # of image components requested in result
152//
153// The return value from an image loader is an 'unsigned char *' which points
154// to the pixel data, or NULL on an allocation failure or if the image is
155// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
156// with each pixel consisting of N interleaved 8-bit components; the first
157// pixel pointed to is top-left-most in the image. There is no padding between
158// image scanlines or between pixels, regardless of format. The number of
159// components N is 'desired_channels' if desired_channels is non-zero, or
160// *channels_in_file otherwise. If desired_channels is non-zero,
161// *channels_in_file has the number of components that _would_ have been
162// output otherwise. E.g. if you set desired_channels to 4, you will always
163// get RGBA output, but you can check *channels_in_file to see if it's trivially
164// opaque because e.g. there were only 3 channels in the source image.
165//
166// An output image with N components has the following components interleaved
167// in this order in each pixel:
168//
169// N=#comp components
170// 1 grey
171// 2 grey, alpha
172// 3 red, green, blue
173// 4 red, green, blue, alpha
174//
175// If image loading fails for any reason, the return value will be NULL,
176// and *x, *y, *channels_in_file will be unchanged. The function
177// stbi_failure_reason() can be queried for an extremely brief, end-user
178// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
179// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
180// more user-friendly ones.
181//
182// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
183//
184// To query the width, height and component count of an image without having to
185// decode the full file, you can use the stbi_info family of functions:
186//
187// int x,y,n,ok;
188// ok = stbi_info(filename, &x, &y, &n);
189// // returns ok=1 and sets x, y, n if image is a supported format,
190// // 0 otherwise.
191//
192// Note that stb_image pervasively uses ints in its public API for sizes,
193// including sizes of memory buffers. This is now part of the API and thus
194// hard to change without causing breakage. As a result, the various image
195// loaders all have certain limits on image size; these differ somewhat
196// by format but generally boil down to either just under 2GB or just under
197// 1GB. When the decoded image would be larger than this, stb_image decoding
198// will fail.
199//
200// Additionally, stb_image will reject image files that have any of their
201// dimensions set to a larger value than the configurable STBI_MAX_DIMENSIONS,
202// which defaults to 2**24 = 16777216 pixels. Due to the above memory limit,
203// the only way to have an image with such dimensions load correctly
204// is for it to have a rather extreme aspect ratio. Either way, the
205// assumption here is that such larger images are likely to be malformed
206// or malicious. If you do need to load an image with individual dimensions
207// larger than that, and it still fits in the overall size limit, you can
208// #define STBI_MAX_DIMENSIONS on your own to be something larger.
209//
210// ===========================================================================
211//
212// UNICODE:
213//
214// If compiling for Windows and you wish to use Unicode filenames, compile
215// with
216// #define STBI_WINDOWS_UTF8
217// and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert
218// Windows wchar_t filenames to utf8.
219//
220// ===========================================================================
221//
222// Philosophy
223//
224// stb libraries are designed with the following priorities:
225//
226// 1. easy to use
227// 2. easy to maintain
228// 3. good performance
229//
230// Sometimes I let "good performance" creep up in priority over "easy to maintain",
231// and for best performance I may provide less-easy-to-use APIs that give higher
232// performance, in addition to the easy-to-use ones. Nevertheless, it's important
233// to keep in mind that from the standpoint of you, a client of this library,
234// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
235//
236// Some secondary priorities arise directly from the first two, some of which
237// provide more explicit reasons why performance can't be emphasized.
238//
239// - Portable ("ease of use")
240// - Small source code footprint ("easy to maintain")
241// - No dependencies ("ease of use")
242//
243// ===========================================================================
244//
245// I/O callbacks
246//
247// I/O callbacks allow you to read from arbitrary sources, like packaged
248// files or some other source. Data read from callbacks are processed
249// through a small internal buffer (currently 128 bytes) to try to reduce
250// overhead.
251//
252// The three functions you must define are "read" (reads some bytes of data),
253// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
254//
255// ===========================================================================
256//
257// SIMD support
258//
259// The JPEG decoder will try to automatically use SIMD kernels on x86 when
260// supported by the compiler. For ARM Neon support, you must explicitly
261// request it.
262//
263// (The old do-it-yourself SIMD API is no longer supported in the current
264// code.)
265//
266// On x86, SSE2 will automatically be used when available based on a run-time
267// test; if not, the generic C versions are used as a fall-back. On ARM targets,
268// the typical path is to have separate builds for NEON and non-NEON devices
269// (at least this is true for iOS and Android). Therefore, the NEON support is
270// toggled by a build flag: define STBI_NEON to get NEON loops.
271//
272// If for some reason you do not want to use any of SIMD code, or if
273// you have issues compiling it, you can disable it entirely by
274// defining STBI_NO_SIMD.
275//
276// ===========================================================================
277//
278// HDR image support (disable by defining STBI_NO_HDR)
279//
280// stb_image supports loading HDR images in general, and currently the Radiance
281// .HDR file format specifically. You can still load any file through the existing
282// interface; if you attempt to load an HDR file, it will be automatically remapped
283// to LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
284// both of these constants can be reconfigured through this interface:
285//
286// stbi_hdr_to_ldr_gamma(2.2f);
287// stbi_hdr_to_ldr_scale(1.0f);
288//
289// (note, do not use _inverse_ constants; stbi_image will invert them
290// appropriately).
291//
292// Additionally, there is a new, parallel interface for loading files as
293// (linear) floats to preserve the full dynamic range:
294//
295// float *data = stbi_loadf(filename, &x, &y, &n, 0);
296//
297// If you load LDR images through this interface, those images will
298// be promoted to floating point values, run through the inverse of
299// constants corresponding to the above:
300//
301// stbi_ldr_to_hdr_scale(1.0f);
302// stbi_ldr_to_hdr_gamma(2.2f);
303//
304// Finally, given a filename (or an open file or memory block--see header
305// file for details) containing image data, you can query for the "most
306// appropriate" interface to use (that is, whether the image is HDR or
307// not), using:
308//
309// stbi_is_hdr(char *filename);
310//
311// ===========================================================================
312//
313// iPhone PNG support:
314//
315// We optionally support converting iPhone-formatted PNGs (which store
316// premultiplied BGRA) back to RGB, even though they're internally encoded
317// differently. To enable this conversion, call
318// stbi_convert_iphone_png_to_rgb(1).
319//
320// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
321// pixel to remove any premultiplied alpha *only* if the image file explicitly
322// says there's premultiplied data (currently only happens in iPhone images,
323// and only if iPhone convert-to-rgb processing is on).
324//
325// ===========================================================================
326//
327// ADDITIONAL CONFIGURATION
328//
329// - You can suppress implementation of any of the decoders to reduce
330// your code footprint by #defining one or more of the following
331// symbols before creating the implementation.
332//
333// STBI_NO_JPEG
334// STBI_NO_PNG
335// STBI_NO_BMP
336// STBI_NO_PSD
337// STBI_NO_TGA
338// STBI_NO_GIF
339// STBI_NO_HDR
340// STBI_NO_PIC
341// STBI_NO_PNM (.ppm and .pgm)
342//
343// - You can request *only* certain decoders and suppress all other ones
344// (this will be more forward-compatible, as addition of new decoders
345// doesn't require you to disable them explicitly):
346//
347// STBI_ONLY_JPEG
348// STBI_ONLY_PNG
349// STBI_ONLY_BMP
350// STBI_ONLY_PSD
351// STBI_ONLY_TGA
352// STBI_ONLY_GIF
353// STBI_ONLY_HDR
354// STBI_ONLY_PIC
355// STBI_ONLY_PNM (.ppm and .pgm)
356//
357// - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
358// want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
359//
360// - If you define STBI_MAX_DIMENSIONS, stb_image will reject images greater
361// than that size (in either width or height) without further processing.
362// This is to let programs in the wild set an upper bound to prevent
363// denial-of-service attacks on untrusted data, as one could generate a
364// valid image of gigantic dimensions and force stb_image to allocate a
365// huge block of memory and spend disproportionate time decoding it. By
366// default this is set to (1 << 24), which is 16777216, but that's still
367// very big.
368
369#ifdef __TINYC__
370#define STBI_NO_SIMD
371#define STBI_NO_THREAD_LOCALS
372#endif
373
374#ifndef STBI_NO_STDIO
375#include <stdio.h>
376#endif // STBI_NO_STDIO
377
378#define STBI_VERSION 1
379
380enum
381{
382 STBI_default = 0, // only used for desired_channels
383
384 STBI_grey = 1,
385 STBI_grey_alpha = 2,
386 STBI_rgb = 3,
387 STBI_rgb_alpha = 4
388};
389
390#include <stdlib.h>
391typedef unsigned char stbi_uc;
392typedef unsigned short stbi_us;
393
394#ifdef __cplusplus
395extern "C" {
396#endif
397
398#ifndef STBIDEF
399#ifdef STB_IMAGE_STATIC
400#define STBIDEF static
401#else
402#define STBIDEF extern
403#endif
404#endif
405
406//////////////////////////////////////////////////////////////////////////////
407//
408// PRIMARY API - works on images of any type
409//
410
411//
412// load image by filename, open file, or memory buffer
413//
414
415typedef struct
416{
417 int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read
418 void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
419 int (*eof) (void *user); // returns nonzero if we are at end of file/data
420} stbi_io_callbacks;
421
422////////////////////////////////////
423//
424// 8-bits-per-channel interface
425//
426
427STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *channels_in_file, int desired_channels);
428STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *channels_in_file, int desired_channels);
429
430#ifndef STBI_NO_STDIO
431STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
432STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
433// for stbi_load_from_file, file pointer is left pointing immediately after image
434#endif
435
436#ifndef STBI_NO_GIF
437STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
438#endif
439
440#ifdef STBI_WINDOWS_UTF8
441STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input);
442#endif
443
444////////////////////////////////////
445//
446// 16-bits-per-channel interface
447//
448
449STBIDEF stbi_us *stbi_load_16_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
450STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
451
452#ifndef STBI_NO_STDIO
453STBIDEF stbi_us *stbi_load_16 (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
454STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
455#endif
456
457////////////////////////////////////
458//
459// float-per-channel interface
460//
461#ifndef STBI_NO_LINEAR
462 STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
463 STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
464
465 #ifndef STBI_NO_STDIO
466 STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
467 STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
468 #endif
469#endif
470
471#ifndef STBI_NO_HDR
472 STBIDEF void stbi_hdr_to_ldr_gamma(float gamma);
473 STBIDEF void stbi_hdr_to_ldr_scale(float scale);
474#endif // STBI_NO_HDR
475
476#ifndef STBI_NO_LINEAR
477 STBIDEF void stbi_ldr_to_hdr_gamma(float gamma);
478 STBIDEF void stbi_ldr_to_hdr_scale(float scale);
479#endif // STBI_NO_LINEAR
480
481// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
482STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
483STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
484#ifndef STBI_NO_STDIO
485STBIDEF int stbi_is_hdr (char const *filename);
486STBIDEF int stbi_is_hdr_from_file(FILE *f);
487#endif // STBI_NO_STDIO
488
489
490// get a VERY brief reason for failure
491// on most compilers (and ALL modern mainstream compilers) this is threadsafe
492STBIDEF const char *stbi_failure_reason (void);
493
494// free the loaded image -- this is just free()
495STBIDEF void stbi_image_free (void *retval_from_stbi_load);
496
497// get image dimensions & components without fully decoding
498STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
499STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
500STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len);
501STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user);
502
503#ifndef STBI_NO_STDIO
504STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp);
505STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp);
506STBIDEF int stbi_is_16_bit (char const *filename);
507STBIDEF int stbi_is_16_bit_from_file(FILE *f);
508#endif
509
510
511
512// for image formats that explicitly notate that they have premultiplied alpha,
513// we just return the colors as stored in the file. set this flag to force
514// unpremultiplication. results are undefined if the unpremultiply overflow.
515STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
516
517// indicate whether we should process iphone images back to canonical format,
518// or just pass them through "as-is"
519STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
520
521// flip the image vertically, so the first pixel in the output array is the bottom left
522STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
523
524// as above, but only applies to images loaded on the thread that calls the function
525// this function is only available if your compiler supports thread-local variables;
526// calling it will fail to link if your compiler doesn't
527STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply);
528STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert);
529STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip);
530
531// ZLIB client - used by PNG, available for other purposes
532
533STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
534STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
535STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
536STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
537
538STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
539STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
540
541
542#ifdef __cplusplus
543}
544#endif
545
546//
547//
548//// end header file /////////////////////////////////////////////////////
549#endif // STBI_INCLUDE_STB_IMAGE_H
550
551#ifdef STB_IMAGE_IMPLEMENTATION
552
553#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
554 || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
555 || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
556 || defined(STBI_ONLY_ZLIB)
557 #ifndef STBI_ONLY_JPEG
558 #define STBI_NO_JPEG
559 #endif
560 #ifndef STBI_ONLY_PNG
561 #define STBI_NO_PNG
562 #endif
563 #ifndef STBI_ONLY_BMP
564 #define STBI_NO_BMP
565 #endif
566 #ifndef STBI_ONLY_PSD
567 #define STBI_NO_PSD
568 #endif
569 #ifndef STBI_ONLY_TGA
570 #define STBI_NO_TGA
571 #endif
572 #ifndef STBI_ONLY_GIF
573 #define STBI_NO_GIF
574 #endif
575 #ifndef STBI_ONLY_HDR
576 #define STBI_NO_HDR
577 #endif
578 #ifndef STBI_ONLY_PIC
579 #define STBI_NO_PIC
580 #endif
581 #ifndef STBI_ONLY_PNM
582 #define STBI_NO_PNM
583 #endif
584#endif
585
586#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
587#define STBI_NO_ZLIB
588#endif
589
590
591#include <stdarg.h>
592#include <stddef.h> // ptrdiff_t on osx
593#include <stdlib.h>
594#include <string.h>
595#include <limits.h>
596
597#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
598#include <math.h> // ldexp, pow
599#endif
600
601#ifndef STBI_NO_STDIO
602#include <stdio.h>
603#endif
604
605#ifndef STBI_ASSERT
606#include <assert.h>
607#define STBI_ASSERT(x) assert(x)
608#endif
609
610#ifdef __cplusplus
611#define STBI_EXTERN extern "C"
612#else
613#define STBI_EXTERN extern
614#endif
615
616
617#ifndef _MSC_VER
618 #ifdef __cplusplus
619 #define stbi_inline inline
620 #else
621 #define stbi_inline
622 #endif
623#else
624 #define stbi_inline __forceinline
625#endif
626
627#ifndef STBI_NO_THREAD_LOCALS
628 #if defined(__cplusplus) && __cplusplus >= 201103L
629 #define STBI_THREAD_LOCAL thread_local
630 #elif defined(__GNUC__) && __GNUC__ < 5
631 #define STBI_THREAD_LOCAL __thread
632 #elif defined(_MSC_VER)
633 #define STBI_THREAD_LOCAL __declspec(thread)
634 #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__)
635 #define STBI_THREAD_LOCAL _Thread_local
636 #endif
637
638 #ifndef STBI_THREAD_LOCAL
639 #if defined(__GNUC__)
640 #define STBI_THREAD_LOCAL __thread
641 #endif
642 #endif
643#endif
644
645#if defined(_MSC_VER) || defined(__SYMBIAN32__)
646typedef unsigned short stbi__uint16;
647typedef signed short stbi__int16;
648typedef unsigned int stbi__uint32;
649typedef signed int stbi__int32;
650#else
651#include <stdint.h>
652typedef uint16_t stbi__uint16;
653typedef int16_t stbi__int16;
654typedef uint32_t stbi__uint32;
655typedef int32_t stbi__int32;
656#endif
657
658// should produce compiler error if size is wrong
659typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
660
661#ifdef _MSC_VER
662#define STBI_NOTUSED(v) (void)(v)
663#else
664#define STBI_NOTUSED(v) (void)sizeof(v)
665#endif
666
667#ifdef _MSC_VER
668#define STBI_HAS_LROTL
669#endif
670
671#ifdef STBI_HAS_LROTL
672 #define stbi_lrot(x,y) _lrotl(x,y)
673#else
674 #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (-(y) & 31)))
675#endif
676
677#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
678// ok
679#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
680// ok
681#else
682#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
683#endif
684
685#ifndef STBI_MALLOC
686#define STBI_MALLOC(sz) malloc(sz)
687#define STBI_REALLOC(p,newsz) realloc(p,newsz)
688#define STBI_FREE(p) free(p)
689#endif
690
691#ifndef STBI_REALLOC_SIZED
692#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
693#endif
694
695// x86/x64 detection
696#if defined(__x86_64__) || defined(_M_X64)
697#define STBI__X64_TARGET
698#elif defined(__i386) || defined(_M_IX86)
699#define STBI__X86_TARGET
700#endif
701
702#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
703// gcc doesn't support sse2 intrinsics unless you compile with -msse2,
704// which in turn means it gets to use SSE2 everywhere. This is unfortunate,
705// but previous attempts to provide the SSE2 functions with runtime
706// detection caused numerous issues. The way architecture extensions are
707// exposed in GCC/Clang is, sadly, not really suited for one-file libs.
708// New behavior: if compiled with -msse2, we use SSE2 without any
709// detection; if not, we don't use it at all.
710#define STBI_NO_SIMD
711#endif
712
713#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
714// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
715//
716// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
717// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
718// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
719// simultaneously enabling "-mstackrealign".
720//
721// See https://github.com/nothings/stb/issues/81 for more information.
722//
723// So default to no SSE2 on 32-bit MinGW. If you've read this far and added
724// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
725#define STBI_NO_SIMD
726#endif
727
728#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) && !defined(__TINYC__)
729#define STBI_SSE2
730#include <emmintrin.h>
731
732#ifdef _MSC_VER
733
734#if _MSC_VER >= 1400 // not VC6
735#include <intrin.h> // __cpuid
736static int stbi__cpuid3(void)
737{
738 int info[4];
739 __cpuid(info,1);
740 return info[3];
741}
742#else
743static int stbi__cpuid3(void)
744{
745 int res;
746 __asm {
747 mov eax,1
748 cpuid
749 mov res,edx
750 }
751 return res;
752}
753#endif
754
755#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
756
757#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
758static int stbi__sse2_available(void)
759{
760 int info3 = stbi__cpuid3();
761 return ((info3 >> 26) & 1) != 0;
762}
763#endif
764
765#else // assume GCC-style if not VC++
766#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
767
768#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
769static int stbi__sse2_available(void)
770{
771 // If we're even attempting to compile this on GCC/Clang, that means
772 // -msse2 is on, which means the compiler is allowed to use SSE2
773 // instructions at will, and so are we.
774 return 1;
775}
776#endif
777
778#endif
779#endif
780
781// ARM NEON
782#if defined(STBI_NO_SIMD) && defined(STBI_NEON)
783#undef STBI_NEON
784#endif
785
786#ifdef STBI_NEON
787#include <arm_neon.h>
788#ifdef _MSC_VER
789#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
790#else
791#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
792#endif
793#endif
794
795#ifndef STBI_SIMD_ALIGN
796#define STBI_SIMD_ALIGN(type, name) type name
797#endif
798
799#ifndef STBI_MAX_DIMENSIONS
800#define STBI_MAX_DIMENSIONS (1 << 24)
801#endif
802
803///////////////////////////////////////////////
804//
805// stbi__context struct and start_xxx functions
806
807// stbi__context structure is our basic context used by all images, so it
808// contains all the IO context, plus some basic image information
809typedef struct
810{
811 stbi__uint32 img_x, img_y;
812 int img_n, img_out_n;
813
814 stbi_io_callbacks io;
815 void *io_user_data;
816
817 int read_from_callbacks;
818 int buflen;
819 stbi_uc buffer_start[128];
820 int callback_already_read;
821
822 stbi_uc *img_buffer, *img_buffer_end;
823 stbi_uc *img_buffer_original, *img_buffer_original_end;
824} stbi__context;
825
826
827static void stbi__refill_buffer(stbi__context *s);
828
829// initialize a memory-decode context
830static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
831{
832 s->io.read = NULL;
833 s->read_from_callbacks = 0;
834 s->callback_already_read = 0;
835 s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
836 s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
837}
838
839// initialize a callback-based context
840static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
841{
842 s->io = *c;
843 s->io_user_data = user;
844 s->buflen = sizeof(s->buffer_start);
845 s->read_from_callbacks = 1;
846 s->callback_already_read = 0;
847 s->img_buffer = s->img_buffer_original = s->buffer_start;
848 stbi__refill_buffer(s);
849 s->img_buffer_original_end = s->img_buffer_end;
850}
851
852#ifndef STBI_NO_STDIO
853
854static int stbi__stdio_read(void *user, char *data, int size)
855{
856 return (int) fread(data,1,size,(FILE*) user);
857}
858
859static void stbi__stdio_skip(void *user, int n)
860{
861 int ch;
862 fseek((FILE*) user, n, SEEK_CUR);
863 ch = fgetc((FILE*) user); /* have to read a byte to reset feof()'s flag */
864 if (ch != EOF) {
865 ungetc(ch, (FILE *) user); /* push byte back onto stream if valid. */
866 }
867}
868
869static int stbi__stdio_eof(void *user)
870{
871 return feof((FILE*) user) || ferror((FILE *) user);
872}
873
874static stbi_io_callbacks stbi__stdio_callbacks =
875{
876 stbi__stdio_read,
877 stbi__stdio_skip,
878 stbi__stdio_eof,
879};
880
881static void stbi__start_file(stbi__context *s, FILE *f)
882{
883 stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
884}
885
886//static void stop_file(stbi__context *s) { }
887
888#endif // !STBI_NO_STDIO
889
890static void stbi__rewind(stbi__context *s)
891{
892 // conceptually rewind SHOULD rewind to the beginning of the stream,
893 // but we just rewind to the beginning of the initial buffer, because
894 // we only use it after doing 'test', which only ever looks at at most 92 bytes
895 s->img_buffer = s->img_buffer_original;
896 s->img_buffer_end = s->img_buffer_original_end;
897}
898
899enum
900{
901 STBI_ORDER_RGB,
902 STBI_ORDER_BGR
903};
904
905typedef struct
906{
907 int bits_per_channel;
908 int num_channels;
909 int channel_order;
910} stbi__result_info;
911
912#ifndef STBI_NO_JPEG
913static int stbi__jpeg_test(stbi__context *s);
914static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
915static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
916#endif
917
918#ifndef STBI_NO_PNG
919static int stbi__png_test(stbi__context *s);
920static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
921static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
922static int stbi__png_is16(stbi__context *s);
923#endif
924
925#ifndef STBI_NO_BMP
926static int stbi__bmp_test(stbi__context *s);
927static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
928static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
929#endif
930
931#ifndef STBI_NO_TGA
932static int stbi__tga_test(stbi__context *s);
933static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
934static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
935#endif
936
937#ifndef STBI_NO_PSD
938static int stbi__psd_test(stbi__context *s);
939static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
940static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
941static int stbi__psd_is16(stbi__context *s);
942#endif
943
944#ifndef STBI_NO_HDR
945static int stbi__hdr_test(stbi__context *s);
946static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
947static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
948#endif
949
950#ifndef STBI_NO_PIC
951static int stbi__pic_test(stbi__context *s);
952static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
953static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
954#endif
955
956#ifndef STBI_NO_GIF
957static int stbi__gif_test(stbi__context *s);
958static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
959static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
960static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
961#endif
962
963#ifndef STBI_NO_PNM
964static int stbi__pnm_test(stbi__context *s);
965static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
966static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
967static int stbi__pnm_is16(stbi__context *s);
968#endif
969
970static
971#ifdef STBI_THREAD_LOCAL
972STBI_THREAD_LOCAL
973#endif
974const char *stbi__g_failure_reason;
975
976STBIDEF const char *stbi_failure_reason(void)
977{
978 return stbi__g_failure_reason;
979}
980
981#ifndef STBI_NO_FAILURE_STRINGS
982static int stbi__err(const char *str)
983{
984 stbi__g_failure_reason = str;
985 return 0;
986}
987#endif
988
989static void *stbi__malloc(size_t size)
990{
991 return STBI_MALLOC(size);
992}
993
994// stb_image uses ints pervasively, including for offset calculations.
995// therefore the largest decoded image size we can support with the
996// current code, even on 64-bit targets, is INT_MAX. this is not a
997// significant limitation for the intended use case.
998//
999// we do, however, need to make sure our size calculations don't
1000// overflow. hence a few helper functions for size calculations that
1001// multiply integers together, making sure that they're non-negative
1002// and no overflow occurs.
1003
1004// return 1 if the sum is valid, 0 on overflow.
1005// negative terms are considered invalid.
1006static int stbi__addsizes_valid(int a, int b)
1007{
1008 if (b < 0) return 0;
1009 // now 0 <= b <= INT_MAX, hence also
1010 // 0 <= INT_MAX - b <= INTMAX.
1011 // And "a + b <= INT_MAX" (which might overflow) is the
1012 // same as a <= INT_MAX - b (no overflow)
1013 return a <= INT_MAX - b;
1014}
1015
1016// returns 1 if the product is valid, 0 on overflow.
1017// negative factors are considered invalid.
1018static int stbi__mul2sizes_valid(int a, int b)
1019{
1020 if (a < 0 || b < 0) return 0;
1021 if (b == 0) return 1; // mul-by-0 is always safe
1022 // portable way to check for no overflows in a*b
1023 return a <= INT_MAX/b;
1024}
1025
1026#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
1027// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
1028static int stbi__mad2sizes_valid(int a, int b, int add)
1029{
1030 return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
1031}
1032#endif
1033
1034// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
1035static int stbi__mad3sizes_valid(int a, int b, int c, int add)
1036{
1037 return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
1038 stbi__addsizes_valid(a*b*c, add);
1039}
1040
1041// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
1042#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
1043static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
1044{
1045 return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
1046 stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
1047}
1048#endif
1049
1050#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
1051// mallocs with size overflow checking
1052static void *stbi__malloc_mad2(int a, int b, int add)
1053{
1054 if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
1055 return stbi__malloc(a*b + add);
1056}
1057#endif
1058
1059static void *stbi__malloc_mad3(int a, int b, int c, int add)
1060{
1061 if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
1062 return stbi__malloc(a*b*c + add);
1063}
1064
1065#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
1066static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
1067{
1068 if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
1069 return stbi__malloc(a*b*c*d + add);
1070}
1071#endif
1072
1073// returns 1 if the sum of two signed ints is valid (between -2^31 and 2^31-1 inclusive), 0 on overflow.
1074static int stbi__addints_valid(int a, int b)
1075{
1076 if ((a >= 0) != (b >= 0)) return 1; // a and b have different signs, so no overflow
1077 if (a < 0 && b < 0) return a >= INT_MIN - b; // same as a + b >= INT_MIN; INT_MIN - b cannot overflow since b < 0.
1078 return a <= INT_MAX - b;
1079}
1080
1081// returns 1 if the product of two ints fits in a signed short, 0 on overflow.
1082static int stbi__mul2shorts_valid(int a, int b)
1083{
1084 if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow
1085 if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid
1086 if (b < 0) return a <= SHRT_MIN / b; // same as a * b >= SHRT_MIN
1087 return a >= SHRT_MIN / b;
1088}
1089
1090// stbi__err - error
1091// stbi__errpf - error returning pointer to float
1092// stbi__errpuc - error returning pointer to unsigned char
1093
1094#ifdef STBI_NO_FAILURE_STRINGS
1095 #define stbi__err(x,y) 0
1096#elif defined(STBI_FAILURE_USERMSG)
1097 #define stbi__err(x,y) stbi__err(y)
1098#else
1099 #define stbi__err(x,y) stbi__err(x)
1100#endif
1101
1102#define stbi__errpf(x,y) ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
1103#define stbi__errpuc(x,y) ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
1104
1105STBIDEF void stbi_image_free(void *retval_from_stbi_load)
1106{
1107 STBI_FREE(retval_from_stbi_load);
1108}
1109
1110#ifndef STBI_NO_LINEAR
1111static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
1112#endif
1113
1114#ifndef STBI_NO_HDR
1115static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp);
1116#endif
1117
1118static int stbi__vertically_flip_on_load_global = 0;
1119
1120STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
1121{
1122 stbi__vertically_flip_on_load_global = flag_true_if_should_flip;
1123}
1124
1125#ifndef STBI_THREAD_LOCAL
1126#define stbi__vertically_flip_on_load stbi__vertically_flip_on_load_global
1127#else
1128static STBI_THREAD_LOCAL int stbi__vertically_flip_on_load_local, stbi__vertically_flip_on_load_set;
1129
1130STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip)
1131{
1132 stbi__vertically_flip_on_load_local = flag_true_if_should_flip;
1133 stbi__vertically_flip_on_load_set = 1;
1134}
1135
1136#define stbi__vertically_flip_on_load (stbi__vertically_flip_on_load_set \
1137 ? stbi__vertically_flip_on_load_local \
1138 : stbi__vertically_flip_on_load_global)
1139#endif // STBI_THREAD_LOCAL
1140
1141static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
1142{
1143 memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
1144 ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
1145 ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
1146 ri->num_channels = 0;
1147
1148 // test the formats with a very explicit header first (at least a FOURCC
1149 // or distinctive magic number first)
1150 #ifndef STBI_NO_PNG
1151 if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri);
1152 #endif
1153 #ifndef STBI_NO_BMP
1154 if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri);
1155 #endif
1156 #ifndef STBI_NO_GIF
1157 if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri);
1158 #endif
1159 #ifndef STBI_NO_PSD
1160 if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
1161 #else
1162 STBI_NOTUSED(bpc);
1163 #endif
1164 #ifndef STBI_NO_PIC
1165 if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri);
1166 #endif
1167
1168 // then the formats that can end up attempting to load with just 1 or 2
1169 // bytes matching expectations; these are prone to false positives, so
1170 // try them later
1171 #ifndef STBI_NO_JPEG
1172 if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
1173 #endif
1174 #ifndef STBI_NO_PNM
1175 if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri);
1176 #endif
1177
1178 #ifndef STBI_NO_HDR
1179 if (stbi__hdr_test(s)) {
1180 float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
1181 return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
1182 }
1183 #endif
1184
1185 #ifndef STBI_NO_TGA
1186 // test tga last because it's a crappy test!
1187 if (stbi__tga_test(s))
1188 return stbi__tga_load(s,x,y,comp,req_comp, ri);
1189 #endif
1190
1191 return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
1192}
1193
1194static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
1195{
1196 int i;
1197 int img_len = w * h * channels;
1198 stbi_uc *reduced;
1199
1200 reduced = (stbi_uc *) stbi__malloc(img_len);
1201 if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
1202
1203 for (i = 0; i < img_len; ++i)
1204 reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
1205
1206 STBI_FREE(orig);
1207 return reduced;
1208}
1209
1210static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
1211{
1212 int i;
1213 int img_len = w * h * channels;
1214 stbi__uint16 *enlarged;
1215
1216 enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
1217 if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1218
1219 for (i = 0; i < img_len; ++i)
1220 enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
1221
1222 STBI_FREE(orig);
1223 return enlarged;
1224}
1225
1226static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
1227{
1228 int row;
1229 size_t bytes_per_row = (size_t)w * bytes_per_pixel;
1230 stbi_uc temp[2048];
1231 stbi_uc *bytes = (stbi_uc *)image;
1232
1233 for (row = 0; row < (h>>1); row++) {
1234 stbi_uc *row0 = bytes + row*bytes_per_row;
1235 stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row;
1236 // swap row0 with row1
1237 size_t bytes_left = bytes_per_row;
1238 while (bytes_left) {
1239 size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
1240 memcpy(temp, row0, bytes_copy);
1241 memcpy(row0, row1, bytes_copy);
1242 memcpy(row1, temp, bytes_copy);
1243 row0 += bytes_copy;
1244 row1 += bytes_copy;
1245 bytes_left -= bytes_copy;
1246 }
1247 }
1248}
1249
1250#ifndef STBI_NO_GIF
1251static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel)
1252{
1253 int slice;
1254 int slice_size = w * h * bytes_per_pixel;
1255
1256 stbi_uc *bytes = (stbi_uc *)image;
1257 for (slice = 0; slice < z; ++slice) {
1258 stbi__vertical_flip(bytes, w, h, bytes_per_pixel);
1259 bytes += slice_size;
1260 }
1261}
1262#endif
1263
1264static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1265{
1266 stbi__result_info ri;
1267 void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
1268
1269 if (result == NULL)
1270 return NULL;
1271
1272 // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
1273 STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
1274
1275 if (ri.bits_per_channel != 8) {
1276 result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1277 ri.bits_per_channel = 8;
1278 }
1279
1280 // @TODO: move stbi__convert_format to here
1281
1282 if (stbi__vertically_flip_on_load) {
1283 int channels = req_comp ? req_comp : *comp;
1284 stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
1285 }
1286
1287 return (unsigned char *) result;
1288}
1289
1290static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1291{
1292 stbi__result_info ri;
1293 void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
1294
1295 if (result == NULL)
1296 return NULL;
1297
1298 // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
1299 STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
1300
1301 if (ri.bits_per_channel != 16) {
1302 result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1303 ri.bits_per_channel = 16;
1304 }
1305
1306 // @TODO: move stbi__convert_format16 to here
1307 // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
1308
1309 if (stbi__vertically_flip_on_load) {
1310 int channels = req_comp ? req_comp : *comp;
1311 stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
1312 }
1313
1314 return (stbi__uint16 *) result;
1315}
1316
1317#if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR)
1318static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
1319{
1320 if (stbi__vertically_flip_on_load && result != NULL) {
1321 int channels = req_comp ? req_comp : *comp;
1322 stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
1323 }
1324}
1325#endif
1326
1327#ifndef STBI_NO_STDIO
1328
1329#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
1330STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
1331STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
1332#endif
1333
1334#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
1335STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
1336{
1337 return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
1338}
1339#endif
1340
1341static FILE *stbi__fopen(char const *filename, char const *mode)
1342{
1343 FILE *f;
1344#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
1345 wchar_t wMode[64];
1346 wchar_t wFilename[1024];
1347 if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename)))
1348 return 0;
1349
1350 if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode)))
1351 return 0;
1352
1353#if defined(_MSC_VER) && _MSC_VER >= 1400
1354 if (0 != _wfopen_s(&f, wFilename, wMode))
1355 f = 0;
1356#else
1357 f = _wfopen(wFilename, wMode);
1358#endif
1359
1360#elif defined(_MSC_VER) && _MSC_VER >= 1400
1361 if (0 != fopen_s(&f, filename, mode))
1362 f=0;
1363#else
1364 f = fopen(filename, mode);
1365#endif
1366 return f;
1367}
1368
1369
1370STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
1371{
1372 FILE *f = stbi__fopen(filename, "rb");
1373 unsigned char *result;
1374 if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
1375 result = stbi_load_from_file(f,x,y,comp,req_comp);
1376 fclose(f);
1377 return result;
1378}
1379
1380STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1381{
1382 unsigned char *result;
1383 stbi__context s;
1384 stbi__start_file(&s,f);
1385 result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1386 if (result) {
1387 // need to 'unget' all the characters in the IO buffer
1388 fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1389 }
1390 return result;
1391}
1392
1393STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
1394{
1395 stbi__uint16 *result;
1396 stbi__context s;
1397 stbi__start_file(&s,f);
1398 result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
1399 if (result) {
1400 // need to 'unget' all the characters in the IO buffer
1401 fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1402 }
1403 return result;
1404}
1405
1406STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
1407{
1408 FILE *f = stbi__fopen(filename, "rb");
1409 stbi__uint16 *result;
1410 if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
1411 result = stbi_load_from_file_16(f,x,y,comp,req_comp);
1412 fclose(f);
1413 return result;
1414}
1415
1416
1417#endif //!STBI_NO_STDIO
1418
1419STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
1420{
1421 stbi__context s;
1422 stbi__start_mem(&s,buffer,len);
1423 return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1424}
1425
1426STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
1427{
1428 stbi__context s;
1429 stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1430 return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1431}
1432
1433STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1434{
1435 stbi__context s;
1436 stbi__start_mem(&s,buffer,len);
1437 return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1438}
1439
1440STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1441{
1442 stbi__context s;
1443 stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1444 return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1445}
1446
1447#ifndef STBI_NO_GIF
1448STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
1449{
1450 unsigned char *result;
1451 stbi__context s;
1452 stbi__start_mem(&s,buffer,len);
1453
1454 result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);
1455 if (stbi__vertically_flip_on_load) {
1456 stbi__vertical_flip_slices( result, *x, *y, *z, *comp );
1457 }
1458
1459 return result;
1460}
1461#endif
1462
1463#ifndef STBI_NO_LINEAR
1464static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1465{
1466 unsigned char *data;
1467 #ifndef STBI_NO_HDR
1468 if (stbi__hdr_test(s)) {
1469 stbi__result_info ri;
1470 float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
1471 if (hdr_data)
1472 stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
1473 return hdr_data;
1474 }
1475 #endif
1476 data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
1477 if (data)
1478 return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
1479 return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
1480}
1481
1482STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1483{
1484 stbi__context s;
1485 stbi__start_mem(&s,buffer,len);
1486 return stbi__loadf_main(&s,x,y,comp,req_comp);
1487}
1488
1489STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1490{
1491 stbi__context s;
1492 stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1493 return stbi__loadf_main(&s,x,y,comp,req_comp);
1494}
1495
1496#ifndef STBI_NO_STDIO
1497STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
1498{
1499 float *result;
1500 FILE *f = stbi__fopen(filename, "rb");
1501 if (!f) return stbi__errpf("can't fopen", "Unable to open file");
1502 result = stbi_loadf_from_file(f,x,y,comp,req_comp);
1503 fclose(f);
1504 return result;
1505}
1506
1507STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1508{
1509 stbi__context s;
1510 stbi__start_file(&s,f);
1511 return stbi__loadf_main(&s,x,y,comp,req_comp);
1512}
1513#endif // !STBI_NO_STDIO
1514
1515#endif // !STBI_NO_LINEAR
1516
1517// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
1518// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
1519// reports false!
1520
1521STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
1522{
1523 #ifndef STBI_NO_HDR
1524 stbi__context s;
1525 stbi__start_mem(&s,buffer,len);
1526 return stbi__hdr_test(&s);
1527 #else
1528 STBI_NOTUSED(buffer);
1529 STBI_NOTUSED(len);
1530 return 0;
1531 #endif
1532}
1533
1534#ifndef STBI_NO_STDIO
1535STBIDEF int stbi_is_hdr (char const *filename)
1536{
1537 FILE *f = stbi__fopen(filename, "rb");
1538 int result=0;
1539 if (f) {
1540 result = stbi_is_hdr_from_file(f);
1541 fclose(f);
1542 }
1543 return result;
1544}
1545
1546STBIDEF int stbi_is_hdr_from_file(FILE *f)
1547{
1548 #ifndef STBI_NO_HDR
1549 long pos = ftell(f);
1550 int res;
1551 stbi__context s;
1552 stbi__start_file(&s,f);
1553 res = stbi__hdr_test(&s);
1554 fseek(f, pos, SEEK_SET);
1555 return res;
1556 #else
1557 STBI_NOTUSED(f);
1558 return 0;
1559 #endif
1560}
1561#endif // !STBI_NO_STDIO
1562
1563STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
1564{
1565 #ifndef STBI_NO_HDR
1566 stbi__context s;
1567 stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1568 return stbi__hdr_test(&s);
1569 #else
1570 STBI_NOTUSED(clbk);
1571 STBI_NOTUSED(user);
1572 return 0;
1573 #endif
1574}
1575
1576#ifndef STBI_NO_LINEAR
1577static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
1578
1579STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
1580STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
1581#endif
1582
1583static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
1584
1585STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
1586STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
1587
1588
1589//////////////////////////////////////////////////////////////////////////////
1590//
1591// Common code used by all image loaders
1592//
1593
1594enum
1595{
1596 STBI__SCAN_load=0,
1597 STBI__SCAN_type,
1598 STBI__SCAN_header
1599};
1600
1601static void stbi__refill_buffer(stbi__context *s)
1602{
1603 int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
1604 s->callback_already_read += (int) (s->img_buffer - s->img_buffer_original);
1605 if (n == 0) {
1606 // at end of file, treat same as if from memory, but need to handle case
1607 // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
1608 s->read_from_callbacks = 0;
1609 s->img_buffer = s->buffer_start;
1610 s->img_buffer_end = s->buffer_start+1;
1611 *s->img_buffer = 0;
1612 } else {
1613 s->img_buffer = s->buffer_start;
1614 s->img_buffer_end = s->buffer_start + n;
1615 }
1616}
1617
1618stbi_inline static stbi_uc stbi__get8(stbi__context *s)
1619{
1620 if (s->img_buffer < s->img_buffer_end)
1621 return *s->img_buffer++;
1622 if (s->read_from_callbacks) {
1623 stbi__refill_buffer(s);
1624 return *s->img_buffer++;
1625 }
1626 return 0;
1627}
1628
1629#if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
1630// nothing
1631#else
1632stbi_inline static int stbi__at_eof(stbi__context *s)
1633{
1634 if (s->io.read) {
1635 if (!(s->io.eof)(s->io_user_data)) return 0;
1636 // if feof() is true, check if buffer = end
1637 // special case: we've only got the special 0 character at the end
1638 if (s->read_from_callbacks == 0) return 1;
1639 }
1640
1641 return s->img_buffer >= s->img_buffer_end;
1642}
1643#endif
1644
1645#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC)
1646// nothing
1647#else
1648static void stbi__skip(stbi__context *s, int n)
1649{
1650 if (n == 0) return; // already there!
1651 if (n < 0) {
1652 s->img_buffer = s->img_buffer_end;
1653 return;
1654 }
1655 if (s->io.read) {
1656 int blen = (int) (s->img_buffer_end - s->img_buffer);
1657 if (blen < n) {
1658 s->img_buffer = s->img_buffer_end;
1659 (s->io.skip)(s->io_user_data, n - blen);
1660 return;
1661 }
1662 }
1663 s->img_buffer += n;
1664}
1665#endif
1666
1667#if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && defined(STBI_NO_PNM)
1668// nothing
1669#else
1670static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
1671{
1672 if (s->io.read) {
1673 int blen = (int) (s->img_buffer_end - s->img_buffer);
1674 if (blen < n) {
1675 int res, count;
1676
1677 memcpy(buffer, s->img_buffer, blen);
1678
1679 count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
1680 res = (count == (n-blen));
1681 s->img_buffer = s->img_buffer_end;
1682 return res;
1683 }
1684 }
1685
1686 if (s->img_buffer+n <= s->img_buffer_end) {
1687 memcpy(buffer, s->img_buffer, n);
1688 s->img_buffer += n;
1689 return 1;
1690 } else
1691 return 0;
1692}
1693#endif
1694
1695#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
1696// nothing
1697#else
1698static int stbi__get16be(stbi__context *s)
1699{
1700 int z = stbi__get8(s);
1701 return (z << 8) + stbi__get8(s);
1702}
1703#endif
1704
1705#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
1706// nothing
1707#else
1708static stbi__uint32 stbi__get32be(stbi__context *s)
1709{
1710 stbi__uint32 z = stbi__get16be(s);
1711 return (z << 16) + stbi__get16be(s);
1712}
1713#endif
1714
1715#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
1716// nothing
1717#else
1718static int stbi__get16le(stbi__context *s)
1719{
1720 int z = stbi__get8(s);
1721 return z + (stbi__get8(s) << 8);
1722}
1723#endif
1724
1725#ifndef STBI_NO_BMP
1726static stbi__uint32 stbi__get32le(stbi__context *s)
1727{
1728 stbi__uint32 z = stbi__get16le(s);
1729 z += (stbi__uint32)stbi__get16le(s) << 16;
1730 return z;
1731}
1732#endif
1733
1734#define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings
1735
1736#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
1737// nothing
1738#else
1739//////////////////////////////////////////////////////////////////////////////
1740//
1741// generic converter from built-in img_n to req_comp
1742// individual types do this automatically as much as possible (e.g. jpeg
1743// does all cases internally since it needs to colorspace convert anyway,
1744// and it never has alpha, so very few cases ). png can automatically
1745// interleave an alpha=255 channel, but falls back to this for other cases
1746//
1747// assume data buffer is malloced, so malloc a new one and free that one
1748// only failure mode is malloc failing
1749
1750static stbi_uc stbi__compute_y(int r, int g, int b)
1751{
1752 return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8);
1753}
1754#endif
1755
1756#if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
1757// nothing
1758#else
1759static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1760{
1761 int i,j;
1762 unsigned char *good;
1763
1764 if (req_comp == img_n) return data;
1765 STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1766
1767 good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
1768 if (good == NULL) {
1769 STBI_FREE(data);
1770 return stbi__errpuc("outofmem", "Out of memory");
1771 }
1772
1773 for (j=0; j < (int) y; ++j) {
1774 unsigned char *src = data + j * x * img_n ;
1775 unsigned char *dest = good + j * x * req_comp;
1776
1777 #define STBI__COMBO(a,b) ((a)*8+(b))
1778 #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1779 // convert source image with img_n components to one with req_comp components;
1780 // avoid switch per pixel, so use switch per scanline and massive macros
1781 switch (STBI__COMBO(img_n, req_comp)) {
1782 STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=255; } break;
1783 STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1784 STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=255; } break;
1785 STBI__CASE(2,1) { dest[0]=src[0]; } break;
1786 STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1787 STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break;
1788 STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=255; } break;
1789 STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break;
1790 STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = 255; } break;
1791 STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break;
1792 STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = src[3]; } break;
1793 STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break;
1794 default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return stbi__errpuc("unsupported", "Unsupported format conversion");
1795 }
1796 #undef STBI__CASE
1797 }
1798
1799 STBI_FREE(data);
1800 return good;
1801}
1802#endif
1803
1804#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
1805// nothing
1806#else
1807static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
1808{
1809 return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8);
1810}
1811#endif
1812
1813#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
1814// nothing
1815#else
1816static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1817{
1818 int i,j;
1819 stbi__uint16 *good;
1820
1821 if (req_comp == img_n) return data;
1822 STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1823
1824 good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
1825 if (good == NULL) {
1826 STBI_FREE(data);
1827 return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1828 }
1829
1830 for (j=0; j < (int) y; ++j) {
1831 stbi__uint16 *src = data + j * x * img_n ;
1832 stbi__uint16 *dest = good + j * x * req_comp;
1833
1834 #define STBI__COMBO(a,b) ((a)*8+(b))
1835 #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1836 // convert source image with img_n components to one with req_comp components;
1837 // avoid switch per pixel, so use switch per scanline and massive macros
1838 switch (STBI__COMBO(img_n, req_comp)) {
1839 STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=0xffff; } break;
1840 STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1841 STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=0xffff; } break;
1842 STBI__CASE(2,1) { dest[0]=src[0]; } break;
1843 STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break;
1844 STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break;
1845 STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=0xffff; } break;
1846 STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break;
1847 STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = 0xffff; } break;
1848 STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break;
1849 STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = src[3]; } break;
1850 STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break;
1851 default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return (stbi__uint16*) stbi__errpuc("unsupported", "Unsupported format conversion");
1852 }
1853 #undef STBI__CASE
1854 }
1855
1856 STBI_FREE(data);
1857 return good;
1858}
1859#endif
1860
1861#ifndef STBI_NO_LINEAR
1862static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
1863{
1864 int i,k,n;
1865 float *output;
1866 if (!data) return NULL;
1867 output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
1868 if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
1869 // compute number of non-alpha components
1870 if (comp & 1) n = comp; else n = comp-1;
1871 for (i=0; i < x*y; ++i) {
1872 for (k=0; k < n; ++k) {
1873 output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
1874 }
1875 }
1876 if (n < comp) {
1877 for (i=0; i < x*y; ++i) {
1878 output[i*comp + n] = data[i*comp + n]/255.0f;
1879 }
1880 }
1881 STBI_FREE(data);
1882 return output;
1883}
1884#endif
1885
1886#ifndef STBI_NO_HDR
1887#define stbi__float2int(x) ((int) (x))
1888static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp)
1889{
1890 int i,k,n;
1891 stbi_uc *output;
1892 if (!data) return NULL;
1893 output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
1894 if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
1895 // compute number of non-alpha components
1896 if (comp & 1) n = comp; else n = comp-1;
1897 for (i=0; i < x*y; ++i) {
1898 for (k=0; k < n; ++k) {
1899 float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
1900 if (z < 0) z = 0;
1901 if (z > 255) z = 255;
1902 output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1903 }
1904 if (k < comp) {
1905 float z = data[i*comp+k] * 255 + 0.5f;
1906 if (z < 0) z = 0;
1907 if (z > 255) z = 255;
1908 output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1909 }
1910 }
1911 STBI_FREE(data);
1912 return output;
1913}
1914#endif
1915
1916//////////////////////////////////////////////////////////////////////////////
1917//
1918// "baseline" JPEG/JFIF decoder
1919//
1920// simple implementation
1921// - doesn't support delayed output of y-dimension
1922// - simple interface (only one output format: 8-bit interleaved RGB)
1923// - doesn't try to recover corrupt jpegs
1924// - doesn't allow partial loading, loading multiple at once
1925// - still fast on x86 (copying globals into locals doesn't help x86)
1926// - allocates lots of intermediate memory (full size of all components)
1927// - non-interleaved case requires this anyway
1928// - allows good upsampling (see next)
1929// high-quality
1930// - upsampled channels are bilinearly interpolated, even across blocks
1931// - quality integer IDCT derived from IJG's 'slow'
1932// performance
1933// - fast huffman; reasonable integer IDCT
1934// - some SIMD kernels for common paths on targets with SSE2/NEON
1935// - uses a lot of intermediate memory, could cache poorly
1936
1937#ifndef STBI_NO_JPEG
1938
1939// huffman decoding acceleration
1940#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache
1941
1942typedef struct
1943{
1944 stbi_uc fast[1 << FAST_BITS];
1945 // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
1946 stbi__uint16 code[256];
1947 stbi_uc values[256];
1948 stbi_uc size[257];
1949 unsigned int maxcode[18];
1950 int delta[17]; // old 'firstsymbol' - old 'firstcode'
1951} stbi__huffman;
1952
1953typedef struct
1954{
1955 stbi__context *s;
1956 stbi__huffman huff_dc[4];
1957 stbi__huffman huff_ac[4];
1958 stbi__uint16 dequant[4][64];
1959 stbi__int16 fast_ac[4][1 << FAST_BITS];
1960
1961// sizes for components, interleaved MCUs
1962 int img_h_max, img_v_max;
1963 int img_mcu_x, img_mcu_y;
1964 int img_mcu_w, img_mcu_h;
1965
1966// definition of jpeg image component
1967 struct
1968 {
1969 int id;
1970 int h,v;
1971 int tq;
1972 int hd,ha;
1973 int dc_pred;
1974
1975 int x,y,w2,h2;
1976 stbi_uc *data;
1977 void *raw_data, *raw_coeff;
1978 stbi_uc *linebuf;
1979 short *coeff; // progressive only
1980 int coeff_w, coeff_h; // number of 8x8 coefficient blocks
1981 } img_comp[4];
1982
1983 stbi__uint32 code_buffer; // jpeg entropy-coded buffer
1984 int code_bits; // number of valid bits
1985 unsigned char marker; // marker seen while filling entropy buffer
1986 int nomore; // flag if we saw a marker so must stop
1987
1988 int progressive;
1989 int spec_start;
1990 int spec_end;
1991 int succ_high;
1992 int succ_low;
1993 int eob_run;
1994 int jfif;
1995 int app14_color_transform; // Adobe APP14 tag
1996 int rgb;
1997
1998 int scan_n, order[4];
1999 int restart_interval, todo;
2000
2001// kernels
2002 void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
2003 void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
2004 stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
2005} stbi__jpeg;
2006
2007static int stbi__build_huffman(stbi__huffman *h, int *count)
2008{
2009 int i,j,k=0;
2010 unsigned int code;
2011 // build size list for each symbol (from JPEG spec)
2012 for (i=0; i < 16; ++i) {
2013 for (j=0; j < count[i]; ++j) {
2014 h->size[k++] = (stbi_uc) (i+1);
2015 if(k >= 257) return stbi__err("bad size list","Corrupt JPEG");
2016 }
2017 }
2018 h->size[k] = 0;
2019
2020 // compute actual symbols (from jpeg spec)
2021 code = 0;
2022 k = 0;
2023 for(j=1; j <= 16; ++j) {
2024 // compute delta to add to code to compute symbol id
2025 h->delta[j] = k - code;
2026 if (h->size[k] == j) {
2027 while (h->size[k] == j)
2028 h->code[k++] = (stbi__uint16) (code++);
2029 if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG");
2030 }
2031 // compute largest code + 1 for this size, preshifted as needed later
2032 h->maxcode[j] = code << (16-j);
2033 code <<= 1;
2034 }
2035 h->maxcode[j] = 0xffffffff;
2036
2037 // build non-spec acceleration table; 255 is flag for not-accelerated
2038 memset(h->fast, 255, 1 << FAST_BITS);
2039 for (i=0; i < k; ++i) {
2040 int s = h->size[i];
2041 if (s <= FAST_BITS) {
2042 int c = h->code[i] << (FAST_BITS-s);
2043 int m = 1 << (FAST_BITS-s);
2044 for (j=0; j < m; ++j) {
2045 h->fast[c+j] = (stbi_uc) i;
2046 }
2047 }
2048 }
2049 return 1;
2050}
2051
2052// build a table that decodes both magnitude and value of small ACs in
2053// one go.
2054static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
2055{
2056 int i;
2057 for (i=0; i < (1 << FAST_BITS); ++i) {
2058 stbi_uc fast = h->fast[i];
2059 fast_ac[i] = 0;
2060 if (fast < 255) {
2061 int rs = h->values[fast];
2062 int run = (rs >> 4) & 15;
2063 int magbits = rs & 15;
2064 int len = h->size[fast];
2065
2066 if (magbits && len + magbits <= FAST_BITS) {
2067 // magnitude code followed by receive_extend code
2068 int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
2069 int m = 1 << (magbits - 1);
2070 if (k < m) k += (~0U << magbits) + 1;
2071 // if the result is small enough, we can fit it in fast_ac table
2072 if (k >= -128 && k <= 127)
2073 fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits));
2074 }
2075 }
2076 }
2077}
2078
2079static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
2080{
2081 do {
2082 unsigned int b = j->nomore ? 0 : stbi__get8(j->s);
2083 if (b == 0xff) {
2084 int c = stbi__get8(j->s);
2085 while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
2086 if (c != 0) {
2087 j->marker = (unsigned char) c;
2088 j->nomore = 1;
2089 return;
2090 }
2091 }
2092 j->code_buffer |= b << (24 - j->code_bits);
2093 j->code_bits += 8;
2094 } while (j->code_bits <= 24);
2095}
2096
2097// (1 << n) - 1
2098static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
2099
2100// decode a jpeg huffman value from the bitstream
2101stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
2102{
2103 unsigned int temp;
2104 int c,k;
2105
2106 if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2107
2108 // look at the top FAST_BITS and determine what symbol ID it is,
2109 // if the code is <= FAST_BITS
2110 c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
2111 k = h->fast[c];
2112 if (k < 255) {
2113 int s = h->size[k];
2114 if (s > j->code_bits)
2115 return -1;
2116 j->code_buffer <<= s;
2117 j->code_bits -= s;
2118 return h->values[k];
2119 }
2120
2121 // naive test is to shift the code_buffer down so k bits are
2122 // valid, then test against maxcode. To speed this up, we've
2123 // preshifted maxcode left so that it has (16-k) 0s at the
2124 // end; in other words, regardless of the number of bits, it
2125 // wants to be compared against something shifted to have 16;
2126 // that way we don't need to shift inside the loop.
2127 temp = j->code_buffer >> 16;
2128 for (k=FAST_BITS+1 ; ; ++k)
2129 if (temp < h->maxcode[k])
2130 break;
2131 if (k == 17) {
2132 // error! code not found
2133 j->code_bits -= 16;
2134 return -1;
2135 }
2136
2137 if (k > j->code_bits)
2138 return -1;
2139
2140 // convert the huffman code to the symbol id
2141 c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
2142 if(c < 0 || c >= 256) // symbol id out of bounds!
2143 return -1;
2144 STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
2145
2146 // convert the id to a symbol
2147 j->code_bits -= k;
2148 j->code_buffer <<= k;
2149 return h->values[c];
2150}
2151
2152// bias[n] = (-1<<n) + 1
2153static const int stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
2154
2155// combined JPEG 'receive' and JPEG 'extend', since baseline
2156// always extends everything it receives.
2157stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
2158{
2159 unsigned int k;
2160 int sgn;
2161 if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
2162 if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing
2163
2164 sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative)
2165 k = stbi_lrot(j->code_buffer, n);
2166 j->code_buffer = k & ~stbi__bmask[n];
2167 k &= stbi__bmask[n];
2168 j->code_bits -= n;
2169 return k + (stbi__jbias[n] & (sgn - 1));
2170}
2171
2172// get some unsigned bits
2173stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
2174{
2175 unsigned int k;
2176 if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
2177 if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing
2178 k = stbi_lrot(j->code_buffer, n);
2179 j->code_buffer = k & ~stbi__bmask[n];
2180 k &= stbi__bmask[n];
2181 j->code_bits -= n;
2182 return k;
2183}
2184
2185stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
2186{
2187 unsigned int k;
2188 if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
2189 if (j->code_bits < 1) return 0; // ran out of bits from stream, return 0s intead of continuing
2190 k = j->code_buffer;
2191 j->code_buffer <<= 1;
2192 --j->code_bits;
2193 return k & 0x80000000;
2194}
2195
2196// given a value that's at position X in the zigzag stream,
2197// where does it appear in the 8x8 matrix coded as row-major?
2198static const stbi_uc stbi__jpeg_dezigzag[64+15] =
2199{
2200 0, 1, 8, 16, 9, 2, 3, 10,
2201 17, 24, 32, 25, 18, 11, 4, 5,
2202 12, 19, 26, 33, 40, 48, 41, 34,
2203 27, 20, 13, 6, 7, 14, 21, 28,
2204 35, 42, 49, 56, 57, 50, 43, 36,
2205 29, 22, 15, 23, 30, 37, 44, 51,
2206 58, 59, 52, 45, 38, 31, 39, 46,
2207 53, 60, 61, 54, 47, 55, 62, 63,
2208 // let corrupt input sample past end
2209 63, 63, 63, 63, 63, 63, 63, 63,
2210 63, 63, 63, 63, 63, 63, 63
2211};
2212
2213// decode one 64-entry block--
2214static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
2215{
2216 int diff,dc,k;
2217 int t;
2218
2219 if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2220 t = stbi__jpeg_huff_decode(j, hdc);
2221 if (t < 0 || t > 15) return stbi__err("bad huffman code","Corrupt JPEG");
2222
2223 // 0 all the ac values now so we can do it 32-bits at a time
2224 memset(data,0,64*sizeof(data[0]));
2225
2226 diff = t ? stbi__extend_receive(j, t) : 0;
2227 if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta","Corrupt JPEG");
2228 dc = j->img_comp[b].dc_pred + diff;
2229 j->img_comp[b].dc_pred = dc;
2230 if (!stbi__mul2shorts_valid(dc, dequant[0])) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2231 data[0] = (short) (dc * dequant[0]);
2232
2233 // decode AC components, see JPEG spec
2234 k = 1;
2235 do {
2236 unsigned int zig;
2237 int c,r,s;
2238 if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2239 c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
2240 r = fac[c];
2241 if (r) { // fast-AC path
2242 k += (r >> 4) & 15; // run
2243 s = r & 15; // combined length
2244 if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available");
2245 j->code_buffer <<= s;
2246 j->code_bits -= s;
2247 // decode into unzigzag'd location
2248 zig = stbi__jpeg_dezigzag[k++];
2249 data[zig] = (short) ((r >> 8) * dequant[zig]);
2250 } else {
2251 int rs = stbi__jpeg_huff_decode(j, hac);
2252 if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2253 s = rs & 15;
2254 r = rs >> 4;
2255 if (s == 0) {
2256 if (rs != 0xf0) break; // end block
2257 k += 16;
2258 } else {
2259 k += r;
2260 // decode into unzigzag'd location
2261 zig = stbi__jpeg_dezigzag[k++];
2262 data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
2263 }
2264 }
2265 } while (k < 64);
2266 return 1;
2267}
2268
2269static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
2270{
2271 int diff,dc;
2272 int t;
2273 if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2274
2275 if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2276
2277 if (j->succ_high == 0) {
2278 // first scan for DC coefficient, must be first
2279 memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
2280 t = stbi__jpeg_huff_decode(j, hdc);
2281 if (t < 0 || t > 15) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2282 diff = t ? stbi__extend_receive(j, t) : 0;
2283
2284 if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta", "Corrupt JPEG");
2285 dc = j->img_comp[b].dc_pred + diff;
2286 j->img_comp[b].dc_pred = dc;
2287 if (!stbi__mul2shorts_valid(dc, 1 << j->succ_low)) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2288 data[0] = (short) (dc * (1 << j->succ_low));
2289 } else {
2290 // refinement scan for DC coefficient
2291 if (stbi__jpeg_get_bit(j))
2292 data[0] += (short) (1 << j->succ_low);
2293 }
2294 return 1;
2295}
2296
2297// @OPTIMIZE: store non-zigzagged during the decode passes,
2298// and only de-zigzag when dequantizing
2299static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
2300{
2301 int k;
2302 if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2303
2304 if (j->succ_high == 0) {
2305 int shift = j->succ_low;
2306
2307 if (j->eob_run) {
2308 --j->eob_run;
2309 return 1;
2310 }
2311
2312 k = j->spec_start;
2313 do {
2314 unsigned int zig;
2315 int c,r,s;
2316 if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2317 c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
2318 r = fac[c];
2319 if (r) { // fast-AC path
2320 k += (r >> 4) & 15; // run
2321 s = r & 15; // combined length
2322 if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available");
2323 j->code_buffer <<= s;
2324 j->code_bits -= s;
2325 zig = stbi__jpeg_dezigzag[k++];
2326 data[zig] = (short) ((r >> 8) * (1 << shift));
2327 } else {
2328 int rs = stbi__jpeg_huff_decode(j, hac);
2329 if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2330 s = rs & 15;
2331 r = rs >> 4;
2332 if (s == 0) {
2333 if (r < 15) {
2334 j->eob_run = (1 << r);
2335 if (r)
2336 j->eob_run += stbi__jpeg_get_bits(j, r);
2337 --j->eob_run;
2338 break;
2339 }
2340 k += 16;
2341 } else {
2342 k += r;
2343 zig = stbi__jpeg_dezigzag[k++];
2344 data[zig] = (short) (stbi__extend_receive(j,s) * (1 << shift));
2345 }
2346 }
2347 } while (k <= j->spec_end);
2348 } else {
2349 // refinement scan for these AC coefficients
2350
2351 short bit = (short) (1 << j->succ_low);
2352
2353 if (j->eob_run) {
2354 --j->eob_run;
2355 for (k = j->spec_start; k <= j->spec_end; ++k) {
2356 short *p = &data[stbi__jpeg_dezigzag[k]];
2357 if (*p != 0)
2358 if (stbi__jpeg_get_bit(j))
2359 if ((*p & bit)==0) {
2360 if (*p > 0)
2361 *p += bit;
2362 else
2363 *p -= bit;
2364 }
2365 }
2366 } else {
2367 k = j->spec_start;
2368 do {
2369 int r,s;
2370 int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
2371 if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2372 s = rs & 15;
2373 r = rs >> 4;
2374 if (s == 0) {
2375 if (r < 15) {
2376 j->eob_run = (1 << r) - 1;
2377 if (r)
2378 j->eob_run += stbi__jpeg_get_bits(j, r);
2379 r = 64; // force end of block
2380 } else {
2381 // r=15 s=0 should write 16 0s, so we just do
2382 // a run of 15 0s and then write s (which is 0),
2383 // so we don't have to do anything special here
2384 }
2385 } else {
2386 if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
2387 // sign bit
2388 if (stbi__jpeg_get_bit(j))
2389 s = bit;
2390 else
2391 s = -bit;
2392 }
2393
2394 // advance by r
2395 while (k <= j->spec_end) {
2396 short *p = &data[stbi__jpeg_dezigzag[k++]];
2397 if (*p != 0) {
2398 if (stbi__jpeg_get_bit(j))
2399 if ((*p & bit)==0) {
2400 if (*p > 0)
2401 *p += bit;
2402 else
2403 *p -= bit;
2404 }
2405 } else {
2406 if (r == 0) {
2407 *p = (short) s;
2408 break;
2409 }
2410 --r;
2411 }
2412 }
2413 } while (k <= j->spec_end);
2414 }
2415 }
2416 return 1;
2417}
2418
2419// take a -128..127 value and stbi__clamp it and convert to 0..255
2420stbi_inline static stbi_uc stbi__clamp(int x)
2421{
2422 // trick to use a single test to catch both cases
2423 if ((unsigned int) x > 255) {
2424 if (x < 0) return 0;
2425 if (x > 255) return 255;
2426 }
2427 return (stbi_uc) x;
2428}
2429
2430#define stbi__f2f(x) ((int) (((x) * 4096 + 0.5)))
2431#define stbi__fsh(x) ((x) * 4096)
2432
2433// derived from jidctint -- DCT_ISLOW
2434#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
2435 int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
2436 p2 = s2; \
2437 p3 = s6; \
2438 p1 = (p2+p3) * stbi__f2f(0.5411961f); \
2439 t2 = p1 + p3*stbi__f2f(-1.847759065f); \
2440 t3 = p1 + p2*stbi__f2f( 0.765366865f); \
2441 p2 = s0; \
2442 p3 = s4; \
2443 t0 = stbi__fsh(p2+p3); \
2444 t1 = stbi__fsh(p2-p3); \
2445 x0 = t0+t3; \
2446 x3 = t0-t3; \
2447 x1 = t1+t2; \
2448 x2 = t1-t2; \
2449 t0 = s7; \
2450 t1 = s5; \
2451 t2 = s3; \
2452 t3 = s1; \
2453 p3 = t0+t2; \
2454 p4 = t1+t3; \
2455 p1 = t0+t3; \
2456 p2 = t1+t2; \
2457 p5 = (p3+p4)*stbi__f2f( 1.175875602f); \
2458 t0 = t0*stbi__f2f( 0.298631336f); \
2459 t1 = t1*stbi__f2f( 2.053119869f); \
2460 t2 = t2*stbi__f2f( 3.072711026f); \
2461 t3 = t3*stbi__f2f( 1.501321110f); \
2462 p1 = p5 + p1*stbi__f2f(-0.899976223f); \
2463 p2 = p5 + p2*stbi__f2f(-2.562915447f); \
2464 p3 = p3*stbi__f2f(-1.961570560f); \
2465 p4 = p4*stbi__f2f(-0.390180644f); \
2466 t3 += p1+p4; \
2467 t2 += p2+p3; \
2468 t1 += p2+p4; \
2469 t0 += p1+p3;
2470
2471static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
2472{
2473 int i,val[64],*v=val;
2474 stbi_uc *o;
2475 short *d = data;
2476
2477 // columns
2478 for (i=0; i < 8; ++i,++d, ++v) {
2479 // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
2480 if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
2481 && d[40]==0 && d[48]==0 && d[56]==0) {
2482 // no shortcut 0 seconds
2483 // (1|2|3|4|5|6|7)==0 0 seconds
2484 // all separate -0.047 seconds
2485 // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds
2486 int dcterm = d[0]*4;
2487 v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
2488 } else {
2489 STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
2490 // constants scaled things up by 1<<12; let's bring them back
2491 // down, but keep 2 extra bits of precision
2492 x0 += 512; x1 += 512; x2 += 512; x3 += 512;
2493 v[ 0] = (x0+t3) >> 10;
2494 v[56] = (x0-t3) >> 10;
2495 v[ 8] = (x1+t2) >> 10;
2496 v[48] = (x1-t2) >> 10;
2497 v[16] = (x2+t1) >> 10;
2498 v[40] = (x2-t1) >> 10;
2499 v[24] = (x3+t0) >> 10;
2500 v[32] = (x3-t0) >> 10;
2501 }
2502 }
2503
2504 for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
2505 // no fast case since the first 1D IDCT spread components out
2506 STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
2507 // constants scaled things up by 1<<12, plus we had 1<<2 from first
2508 // loop, plus horizontal and vertical each scale by sqrt(8) so together
2509 // we've got an extra 1<<3, so 1<<17 total we need to remove.
2510 // so we want to round that, which means adding 0.5 * 1<<17,
2511 // aka 65536. Also, we'll end up with -128 to 127 that we want
2512 // to encode as 0..255 by adding 128, so we'll add that before the shift
2513 x0 += 65536 + (128<<17);
2514 x1 += 65536 + (128<<17);
2515 x2 += 65536 + (128<<17);
2516 x3 += 65536 + (128<<17);
2517 // tried computing the shifts into temps, or'ing the temps to see
2518 // if any were out of range, but that was slower
2519 o[0] = stbi__clamp((x0+t3) >> 17);
2520 o[7] = stbi__clamp((x0-t3) >> 17);
2521 o[1] = stbi__clamp((x1+t2) >> 17);
2522 o[6] = stbi__clamp((x1-t2) >> 17);
2523 o[2] = stbi__clamp((x2+t1) >> 17);
2524 o[5] = stbi__clamp((x2-t1) >> 17);
2525 o[3] = stbi__clamp((x3+t0) >> 17);
2526 o[4] = stbi__clamp((x3-t0) >> 17);
2527 }
2528}
2529
2530#ifdef STBI_SSE2
2531// sse2 integer IDCT. not the fastest possible implementation but it
2532// produces bit-identical results to the generic C version so it's
2533// fully "transparent".
2534static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2535{
2536 // This is constructed to match our regular (generic) integer IDCT exactly.
2537 __m128i row0, row1, row2, row3, row4, row5, row6, row7;
2538 __m128i tmp;
2539
2540 // dot product constant: even elems=x, odd elems=y
2541 #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
2542
2543 // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit)
2544 // out(1) = c1[even]*x + c1[odd]*y
2545 #define dct_rot(out0,out1, x,y,c0,c1) \
2546 __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
2547 __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
2548 __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
2549 __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
2550 __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
2551 __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
2552
2553 // out = in << 12 (in 16-bit, out 32-bit)
2554 #define dct_widen(out, in) \
2555 __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
2556 __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
2557
2558 // wide add
2559 #define dct_wadd(out, a, b) \
2560 __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
2561 __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
2562
2563 // wide sub
2564 #define dct_wsub(out, a, b) \
2565 __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
2566 __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
2567
2568 // butterfly a/b, add bias, then shift by "s" and pack
2569 #define dct_bfly32o(out0, out1, a,b,bias,s) \
2570 { \
2571 __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
2572 __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
2573 dct_wadd(sum, abiased, b); \
2574 dct_wsub(dif, abiased, b); \
2575 out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
2576 out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
2577 }
2578
2579 // 8-bit interleave step (for transposes)
2580 #define dct_interleave8(a, b) \
2581 tmp = a; \
2582 a = _mm_unpacklo_epi8(a, b); \
2583 b = _mm_unpackhi_epi8(tmp, b)
2584
2585 // 16-bit interleave step (for transposes)
2586 #define dct_interleave16(a, b) \
2587 tmp = a; \
2588 a = _mm_unpacklo_epi16(a, b); \
2589 b = _mm_unpackhi_epi16(tmp, b)
2590
2591 #define dct_pass(bias,shift) \
2592 { \
2593 /* even part */ \
2594 dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
2595 __m128i sum04 = _mm_add_epi16(row0, row4); \
2596 __m128i dif04 = _mm_sub_epi16(row0, row4); \
2597 dct_widen(t0e, sum04); \
2598 dct_widen(t1e, dif04); \
2599 dct_wadd(x0, t0e, t3e); \
2600 dct_wsub(x3, t0e, t3e); \
2601 dct_wadd(x1, t1e, t2e); \
2602 dct_wsub(x2, t1e, t2e); \
2603 /* odd part */ \
2604 dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
2605 dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
2606 __m128i sum17 = _mm_add_epi16(row1, row7); \
2607 __m128i sum35 = _mm_add_epi16(row3, row5); \
2608 dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
2609 dct_wadd(x4, y0o, y4o); \
2610 dct_wadd(x5, y1o, y5o); \
2611 dct_wadd(x6, y2o, y5o); \
2612 dct_wadd(x7, y3o, y4o); \
2613 dct_bfly32o(row0,row7, x0,x7,bias,shift); \
2614 dct_bfly32o(row1,row6, x1,x6,bias,shift); \
2615 dct_bfly32o(row2,row5, x2,x5,bias,shift); \
2616 dct_bfly32o(row3,row4, x3,x4,bias,shift); \
2617 }
2618
2619 __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
2620 __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
2621 __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
2622 __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
2623 __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
2624 __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
2625 __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
2626 __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
2627
2628 // rounding biases in column/row passes, see stbi__idct_block for explanation.
2629 __m128i bias_0 = _mm_set1_epi32(512);
2630 __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
2631
2632 // load
2633 row0 = _mm_load_si128((const __m128i *) (data + 0*8));
2634 row1 = _mm_load_si128((const __m128i *) (data + 1*8));
2635 row2 = _mm_load_si128((const __m128i *) (data + 2*8));
2636 row3 = _mm_load_si128((const __m128i *) (data + 3*8));
2637 row4 = _mm_load_si128((const __m128i *) (data + 4*8));
2638 row5 = _mm_load_si128((const __m128i *) (data + 5*8));
2639 row6 = _mm_load_si128((const __m128i *) (data + 6*8));
2640 row7 = _mm_load_si128((const __m128i *) (data + 7*8));
2641
2642 // column pass
2643 dct_pass(bias_0, 10);
2644
2645 {
2646 // 16bit 8x8 transpose pass 1
2647 dct_interleave16(row0, row4);
2648 dct_interleave16(row1, row5);
2649 dct_interleave16(row2, row6);
2650 dct_interleave16(row3, row7);
2651
2652 // transpose pass 2
2653 dct_interleave16(row0, row2);
2654 dct_interleave16(row1, row3);
2655 dct_interleave16(row4, row6);
2656 dct_interleave16(row5, row7);
2657
2658 // transpose pass 3
2659 dct_interleave16(row0, row1);
2660 dct_interleave16(row2, row3);
2661 dct_interleave16(row4, row5);
2662 dct_interleave16(row6, row7);
2663 }
2664
2665 // row pass
2666 dct_pass(bias_1, 17);
2667
2668 {
2669 // pack
2670 __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
2671 __m128i p1 = _mm_packus_epi16(row2, row3);
2672 __m128i p2 = _mm_packus_epi16(row4, row5);
2673 __m128i p3 = _mm_packus_epi16(row6, row7);
2674
2675 // 8bit 8x8 transpose pass 1
2676 dct_interleave8(p0, p2); // a0e0a1e1...
2677 dct_interleave8(p1, p3); // c0g0c1g1...
2678
2679 // transpose pass 2
2680 dct_interleave8(p0, p1); // a0c0e0g0...
2681 dct_interleave8(p2, p3); // b0d0f0h0...
2682
2683 // transpose pass 3
2684 dct_interleave8(p0, p2); // a0b0c0d0...
2685 dct_interleave8(p1, p3); // a4b4c4d4...
2686
2687 // store
2688 _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
2689 _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
2690 _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
2691 _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
2692 _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
2693 _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
2694 _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
2695 _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
2696 }
2697
2698#undef dct_const
2699#undef dct_rot
2700#undef dct_widen
2701#undef dct_wadd
2702#undef dct_wsub
2703#undef dct_bfly32o
2704#undef dct_interleave8
2705#undef dct_interleave16
2706#undef dct_pass
2707}
2708
2709#endif // STBI_SSE2
2710
2711#ifdef STBI_NEON
2712
2713// NEON integer IDCT. should produce bit-identical
2714// results to the generic C version.
2715static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2716{
2717 int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
2718
2719 int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
2720 int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
2721 int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
2722 int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
2723 int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
2724 int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
2725 int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
2726 int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
2727 int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
2728 int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
2729 int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
2730 int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
2731
2732#define dct_long_mul(out, inq, coeff) \
2733 int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
2734 int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
2735
2736#define dct_long_mac(out, acc, inq, coeff) \
2737 int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
2738 int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
2739
2740#define dct_widen(out, inq) \
2741 int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
2742 int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
2743
2744// wide add
2745#define dct_wadd(out, a, b) \
2746 int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
2747 int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
2748
2749// wide sub
2750#define dct_wsub(out, a, b) \
2751 int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
2752 int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
2753
2754// butterfly a/b, then shift using "shiftop" by "s" and pack
2755#define dct_bfly32o(out0,out1, a,b,shiftop,s) \
2756 { \
2757 dct_wadd(sum, a, b); \
2758 dct_wsub(dif, a, b); \
2759 out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
2760 out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
2761 }
2762
2763#define dct_pass(shiftop, shift) \
2764 { \
2765 /* even part */ \
2766 int16x8_t sum26 = vaddq_s16(row2, row6); \
2767 dct_long_mul(p1e, sum26, rot0_0); \
2768 dct_long_mac(t2e, p1e, row6, rot0_1); \
2769 dct_long_mac(t3e, p1e, row2, rot0_2); \
2770 int16x8_t sum04 = vaddq_s16(row0, row4); \
2771 int16x8_t dif04 = vsubq_s16(row0, row4); \
2772 dct_widen(t0e, sum04); \
2773 dct_widen(t1e, dif04); \
2774 dct_wadd(x0, t0e, t3e); \
2775 dct_wsub(x3, t0e, t3e); \
2776 dct_wadd(x1, t1e, t2e); \
2777 dct_wsub(x2, t1e, t2e); \
2778 /* odd part */ \
2779 int16x8_t sum15 = vaddq_s16(row1, row5); \
2780 int16x8_t sum17 = vaddq_s16(row1, row7); \
2781 int16x8_t sum35 = vaddq_s16(row3, row5); \
2782 int16x8_t sum37 = vaddq_s16(row3, row7); \
2783 int16x8_t sumodd = vaddq_s16(sum17, sum35); \
2784 dct_long_mul(p5o, sumodd, rot1_0); \
2785 dct_long_mac(p1o, p5o, sum17, rot1_1); \
2786 dct_long_mac(p2o, p5o, sum35, rot1_2); \
2787 dct_long_mul(p3o, sum37, rot2_0); \
2788 dct_long_mul(p4o, sum15, rot2_1); \
2789 dct_wadd(sump13o, p1o, p3o); \
2790 dct_wadd(sump24o, p2o, p4o); \
2791 dct_wadd(sump23o, p2o, p3o); \
2792 dct_wadd(sump14o, p1o, p4o); \
2793 dct_long_mac(x4, sump13o, row7, rot3_0); \
2794 dct_long_mac(x5, sump24o, row5, rot3_1); \
2795 dct_long_mac(x6, sump23o, row3, rot3_2); \
2796 dct_long_mac(x7, sump14o, row1, rot3_3); \
2797 dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
2798 dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
2799 dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
2800 dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
2801 }
2802
2803 // load
2804 row0 = vld1q_s16(data + 0*8);
2805 row1 = vld1q_s16(data + 1*8);
2806 row2 = vld1q_s16(data + 2*8);
2807 row3 = vld1q_s16(data + 3*8);
2808 row4 = vld1q_s16(data + 4*8);
2809 row5 = vld1q_s16(data + 5*8);
2810 row6 = vld1q_s16(data + 6*8);
2811 row7 = vld1q_s16(data + 7*8);
2812
2813 // add DC bias
2814 row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
2815
2816 // column pass
2817 dct_pass(vrshrn_n_s32, 10);
2818
2819 // 16bit 8x8 transpose
2820 {
2821// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
2822// whether compilers actually get this is another story, sadly.
2823#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
2824#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
2825#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
2826
2827 // pass 1
2828 dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
2829 dct_trn16(row2, row3);
2830 dct_trn16(row4, row5);
2831 dct_trn16(row6, row7);
2832
2833 // pass 2
2834 dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
2835 dct_trn32(row1, row3);
2836 dct_trn32(row4, row6);
2837 dct_trn32(row5, row7);
2838
2839 // pass 3
2840 dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
2841 dct_trn64(row1, row5);
2842 dct_trn64(row2, row6);
2843 dct_trn64(row3, row7);
2844
2845#undef dct_trn16
2846#undef dct_trn32
2847#undef dct_trn64
2848 }
2849
2850 // row pass
2851 // vrshrn_n_s32 only supports shifts up to 16, we need
2852 // 17. so do a non-rounding shift of 16 first then follow
2853 // up with a rounding shift by 1.
2854 dct_pass(vshrn_n_s32, 16);
2855
2856 {
2857 // pack and round
2858 uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
2859 uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
2860 uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
2861 uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
2862 uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
2863 uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
2864 uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
2865 uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
2866
2867 // again, these can translate into one instruction, but often don't.
2868#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
2869#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
2870#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
2871
2872 // sadly can't use interleaved stores here since we only write
2873 // 8 bytes to each scan line!
2874
2875 // 8x8 8-bit transpose pass 1
2876 dct_trn8_8(p0, p1);
2877 dct_trn8_8(p2, p3);
2878 dct_trn8_8(p4, p5);
2879 dct_trn8_8(p6, p7);
2880
2881 // pass 2
2882 dct_trn8_16(p0, p2);
2883 dct_trn8_16(p1, p3);
2884 dct_trn8_16(p4, p6);
2885 dct_trn8_16(p5, p7);
2886
2887 // pass 3
2888 dct_trn8_32(p0, p4);
2889 dct_trn8_32(p1, p5);
2890 dct_trn8_32(p2, p6);
2891 dct_trn8_32(p3, p7);
2892
2893 // store
2894 vst1_u8(out, p0); out += out_stride;
2895 vst1_u8(out, p1); out += out_stride;
2896 vst1_u8(out, p2); out += out_stride;
2897 vst1_u8(out, p3); out += out_stride;
2898 vst1_u8(out, p4); out += out_stride;
2899 vst1_u8(out, p5); out += out_stride;
2900 vst1_u8(out, p6); out += out_stride;
2901 vst1_u8(out, p7);
2902
2903#undef dct_trn8_8
2904#undef dct_trn8_16
2905#undef dct_trn8_32
2906 }
2907
2908#undef dct_long_mul
2909#undef dct_long_mac
2910#undef dct_widen
2911#undef dct_wadd
2912#undef dct_wsub
2913#undef dct_bfly32o
2914#undef dct_pass
2915}
2916
2917#endif // STBI_NEON
2918
2919#define STBI__MARKER_none 0xff
2920// if there's a pending marker from the entropy stream, return that
2921// otherwise, fetch from the stream and get a marker. if there's no
2922// marker, return 0xff, which is never a valid marker value
2923static stbi_uc stbi__get_marker(stbi__jpeg *j)
2924{
2925 stbi_uc x;
2926 if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
2927 x = stbi__get8(j->s);
2928 if (x != 0xff) return STBI__MARKER_none;
2929 while (x == 0xff)
2930 x = stbi__get8(j->s); // consume repeated 0xff fill bytes
2931 return x;
2932}
2933
2934// in each scan, we'll have scan_n components, and the order
2935// of the components is specified by order[]
2936#define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7)
2937
2938// after a restart interval, stbi__jpeg_reset the entropy decoder and
2939// the dc prediction
2940static void stbi__jpeg_reset(stbi__jpeg *j)
2941{
2942 j->code_bits = 0;
2943 j->code_buffer = 0;
2944 j->nomore = 0;
2945 j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
2946 j->marker = STBI__MARKER_none;
2947 j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
2948 j->eob_run = 0;
2949 // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
2950 // since we don't even allow 1<<30 pixels
2951}
2952
2953static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
2954{
2955 stbi__jpeg_reset(z);
2956 if (!z->progressive) {
2957 if (z->scan_n == 1) {
2958 int i,j;
2959 STBI_SIMD_ALIGN(short, data[64]);
2960 int n = z->order[0];
2961 // non-interleaved data, we just need to process one block at a time,
2962 // in trivial scanline order
2963 // number of blocks to do just depends on how many actual "pixels" this
2964 // component has, independent of interleaved MCU blocking and such
2965 int w = (z->img_comp[n].x+7) >> 3;
2966 int h = (z->img_comp[n].y+7) >> 3;
2967 for (j=0; j < h; ++j) {
2968 for (i=0; i < w; ++i) {
2969 int ha = z->img_comp[n].ha;
2970 if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2971 z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2972 // every data block is an MCU, so countdown the restart interval
2973 if (--z->todo <= 0) {
2974 if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2975 // if it's NOT a restart, then just bail, so we get corrupt data
2976 // rather than no data
2977 if (!STBI__RESTART(z->marker)) return 1;
2978 stbi__jpeg_reset(z);
2979 }
2980 }
2981 }
2982 return 1;
2983 } else { // interleaved
2984 int i,j,k,x,y;
2985 STBI_SIMD_ALIGN(short, data[64]);
2986 for (j=0; j < z->img_mcu_y; ++j) {
2987 for (i=0; i < z->img_mcu_x; ++i) {
2988 // scan an interleaved mcu... process scan_n components in order
2989 for (k=0; k < z->scan_n; ++k) {
2990 int n = z->order[k];
2991 // scan out an mcu's worth of this component; that's just determined
2992 // by the basic H and V specified for the component
2993 for (y=0; y < z->img_comp[n].v; ++y) {
2994 for (x=0; x < z->img_comp[n].h; ++x) {
2995 int x2 = (i*z->img_comp[n].h + x)*8;
2996 int y2 = (j*z->img_comp[n].v + y)*8;
2997 int ha = z->img_comp[n].ha;
2998 if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2999 z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
3000 }
3001 }
3002 }
3003 // after all interleaved components, that's an interleaved MCU,
3004 // so now count down the restart interval
3005 if (--z->todo <= 0) {
3006 if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
3007 if (!STBI__RESTART(z->marker)) return 1;
3008 stbi__jpeg_reset(z);
3009 }
3010 }
3011 }
3012 return 1;
3013 }
3014 } else {
3015 if (z->scan_n == 1) {
3016 int i,j;
3017 int n = z->order[0];
3018 // non-interleaved data, we just need to process one block at a time,
3019 // in trivial scanline order
3020 // number of blocks to do just depends on how many actual "pixels" this
3021 // component has, independent of interleaved MCU blocking and such
3022 int w = (z->img_comp[n].x+7) >> 3;
3023 int h = (z->img_comp[n].y+7) >> 3;
3024 for (j=0; j < h; ++j) {
3025 for (i=0; i < w; ++i) {
3026 short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
3027 if (z->spec_start == 0) {
3028 if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
3029 return 0;
3030 } else {
3031 int ha = z->img_comp[n].ha;
3032 if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
3033 return 0;
3034 }
3035 // every data block is an MCU, so countdown the restart interval
3036 if (--z->todo <= 0) {
3037 if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
3038 if (!STBI__RESTART(z->marker)) return 1;
3039 stbi__jpeg_reset(z);
3040 }
3041 }
3042 }
3043 return 1;
3044 } else { // interleaved
3045 int i,j,k,x,y;
3046 for (j=0; j < z->img_mcu_y; ++j) {
3047 for (i=0; i < z->img_mcu_x; ++i) {
3048 // scan an interleaved mcu... process scan_n components in order
3049 for (k=0; k < z->scan_n; ++k) {
3050 int n = z->order[k];
3051 // scan out an mcu's worth of this component; that's just determined
3052 // by the basic H and V specified for the component
3053 for (y=0; y < z->img_comp[n].v; ++y) {
3054 for (x=0; x < z->img_comp[n].h; ++x) {
3055 int x2 = (i*z->img_comp[n].h + x);
3056 int y2 = (j*z->img_comp[n].v + y);
3057 short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
3058 if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
3059 return 0;
3060 }
3061 }
3062 }
3063 // after all interleaved components, that's an interleaved MCU,
3064 // so now count down the restart interval
3065 if (--z->todo <= 0) {
3066 if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
3067 if (!STBI__RESTART(z->marker)) return 1;
3068 stbi__jpeg_reset(z);
3069 }
3070 }
3071 }
3072 return 1;
3073 }
3074 }
3075}
3076
3077static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
3078{
3079 int i;
3080 for (i=0; i < 64; ++i)
3081 data[i] *= dequant[i];
3082}
3083
3084static void stbi__jpeg_finish(stbi__jpeg *z)
3085{
3086 if (z->progressive) {
3087 // dequantize and idct the data
3088 int i,j,n;
3089 for (n=0; n < z->s->img_n; ++n) {
3090 int w = (z->img_comp[n].x+7) >> 3;
3091 int h = (z->img_comp[n].y+7) >> 3;
3092 for (j=0; j < h; ++j) {
3093 for (i=0; i < w; ++i) {
3094 short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
3095 stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
3096 z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
3097 }
3098 }
3099 }
3100 }
3101}
3102
3103static int stbi__process_marker(stbi__jpeg *z, int m)
3104{
3105 int L;
3106 switch (m) {
3107 case STBI__MARKER_none: // no marker found
3108 return stbi__err("expected marker","Corrupt JPEG");
3109
3110 case 0xDD: // DRI - specify restart interval
3111 if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
3112 z->restart_interval = stbi__get16be(z->s);
3113 return 1;
3114
3115 case 0xDB: // DQT - define quantization table
3116 L = stbi__get16be(z->s)-2;
3117 while (L > 0) {
3118 int q = stbi__get8(z->s);
3119 int p = q >> 4, sixteen = (p != 0);
3120 int t = q & 15,i;
3121 if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
3122 if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
3123
3124 for (i=0; i < 64; ++i)
3125 z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
3126 L -= (sixteen ? 129 : 65);
3127 }
3128 return L==0;
3129
3130 case 0xC4: // DHT - define huffman table
3131 L = stbi__get16be(z->s)-2;
3132 while (L > 0) {
3133 stbi_uc *v;
3134 int sizes[16],i,n=0;
3135 int q = stbi__get8(z->s);
3136 int tc = q >> 4;
3137 int th = q & 15;
3138 if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
3139 for (i=0; i < 16; ++i) {
3140 sizes[i] = stbi__get8(z->s);
3141 n += sizes[i];
3142 }
3143 if(n > 256) return stbi__err("bad DHT header","Corrupt JPEG"); // Loop over i < n would write past end of values!
3144 L -= 17;
3145 if (tc == 0) {
3146 if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
3147 v = z->huff_dc[th].values;
3148 } else {
3149 if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
3150 v = z->huff_ac[th].values;
3151 }
3152 for (i=0; i < n; ++i)
3153 v[i] = stbi__get8(z->s);
3154 if (tc != 0)
3155 stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
3156 L -= n;
3157 }
3158 return L==0;
3159 }
3160
3161 // check for comment block or APP blocks
3162 if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
3163 L = stbi__get16be(z->s);
3164 if (L < 2) {
3165 if (m == 0xFE)
3166 return stbi__err("bad COM len","Corrupt JPEG");
3167 else
3168 return stbi__err("bad APP len","Corrupt JPEG");
3169 }
3170 L -= 2;
3171
3172 if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
3173 static const unsigned char tag[5] = {'J','F','I','F','\0'};
3174 int ok = 1;
3175 int i;
3176 for (i=0; i < 5; ++i)
3177 if (stbi__get8(z->s) != tag[i])
3178 ok = 0;
3179 L -= 5;
3180 if (ok)
3181 z->jfif = 1;
3182 } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
3183 static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
3184 int ok = 1;
3185 int i;
3186 for (i=0; i < 6; ++i)
3187 if (stbi__get8(z->s) != tag[i])
3188 ok = 0;
3189 L -= 6;
3190 if (ok) {
3191 stbi__get8(z->s); // version
3192 stbi__get16be(z->s); // flags0
3193 stbi__get16be(z->s); // flags1
3194 z->app14_color_transform = stbi__get8(z->s); // color transform
3195 L -= 6;
3196 }
3197 }
3198
3199 stbi__skip(z->s, L);
3200 return 1;
3201 }
3202
3203 return stbi__err("unknown marker","Corrupt JPEG");
3204}
3205
3206// after we see SOS
3207static int stbi__process_scan_header(stbi__jpeg *z)
3208{
3209 int i;
3210 int Ls = stbi__get16be(z->s);
3211 z->scan_n = stbi__get8(z->s);
3212 if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
3213 if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
3214 for (i=0; i < z->scan_n; ++i) {
3215 int id = stbi__get8(z->s), which;
3216 int q = stbi__get8(z->s);
3217 for (which = 0; which < z->s->img_n; ++which)
3218 if (z->img_comp[which].id == id)
3219 break;
3220 if (which == z->s->img_n) return 0; // no match
3221 z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
3222 z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
3223 z->order[i] = which;
3224 }
3225
3226 {
3227 int aa;
3228 z->spec_start = stbi__get8(z->s);
3229 z->spec_end = stbi__get8(z->s); // should be 63, but might be 0
3230 aa = stbi__get8(z->s);
3231 z->succ_high = (aa >> 4);
3232 z->succ_low = (aa & 15);
3233 if (z->progressive) {
3234 if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
3235 return stbi__err("bad SOS", "Corrupt JPEG");
3236 } else {
3237 if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
3238 if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
3239 z->spec_end = 63;
3240 }
3241 }
3242
3243 return 1;
3244}
3245
3246static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
3247{
3248 int i;
3249 for (i=0; i < ncomp; ++i) {
3250 if (z->img_comp[i].raw_data) {
3251 STBI_FREE(z->img_comp[i].raw_data);
3252 z->img_comp[i].raw_data = NULL;
3253 z->img_comp[i].data = NULL;
3254 }
3255 if (z->img_comp[i].raw_coeff) {
3256 STBI_FREE(z->img_comp[i].raw_coeff);
3257 z->img_comp[i].raw_coeff = 0;
3258 z->img_comp[i].coeff = 0;
3259 }
3260 if (z->img_comp[i].linebuf) {
3261 STBI_FREE(z->img_comp[i].linebuf);
3262 z->img_comp[i].linebuf = NULL;
3263 }
3264 }
3265 return why;
3266}
3267
3268static int stbi__process_frame_header(stbi__jpeg *z, int scan)
3269{
3270 stbi__context *s = z->s;
3271 int Lf,p,i,q, h_max=1,v_max=1,c;
3272 Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
3273 p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
3274 s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
3275 s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
3276 if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
3277 if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
3278 c = stbi__get8(s);
3279 if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
3280 s->img_n = c;
3281 for (i=0; i < c; ++i) {
3282 z->img_comp[i].data = NULL;
3283 z->img_comp[i].linebuf = NULL;
3284 }
3285
3286 if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
3287
3288 z->rgb = 0;
3289 for (i=0; i < s->img_n; ++i) {
3290 static const unsigned char rgb[3] = { 'R', 'G', 'B' };
3291 z->img_comp[i].id = stbi__get8(s);
3292 if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
3293 ++z->rgb;
3294 q = stbi__get8(s);
3295 z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
3296 z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
3297 z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
3298 }
3299
3300 if (scan != STBI__SCAN_load) return 1;
3301
3302 if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
3303
3304 for (i=0; i < s->img_n; ++i) {
3305 if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
3306 if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
3307 }
3308
3309 // check that plane subsampling factors are integer ratios; our resamplers can't deal with fractional ratios
3310 // and I've never seen a non-corrupted JPEG file actually use them
3311 for (i=0; i < s->img_n; ++i) {
3312 if (h_max % z->img_comp[i].h != 0) return stbi__err("bad H","Corrupt JPEG");
3313 if (v_max % z->img_comp[i].v != 0) return stbi__err("bad V","Corrupt JPEG");
3314 }
3315
3316 // compute interleaved mcu info
3317 z->img_h_max = h_max;
3318 z->img_v_max = v_max;
3319 z->img_mcu_w = h_max * 8;
3320 z->img_mcu_h = v_max * 8;
3321 // these sizes can't be more than 17 bits
3322 z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
3323 z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
3324
3325 for (i=0; i < s->img_n; ++i) {
3326 // number of effective pixels (e.g. for non-interleaved MCU)
3327 z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
3328 z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
3329 // to simplify generation, we'll allocate enough memory to decode
3330 // the bogus oversized data from using interleaved MCUs and their
3331 // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
3332 // discard the extra data until colorspace conversion
3333 //
3334 // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
3335 // so these muls can't overflow with 32-bit ints (which we require)
3336 z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
3337 z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
3338 z->img_comp[i].coeff = 0;
3339 z->img_comp[i].raw_coeff = 0;
3340 z->img_comp[i].linebuf = NULL;
3341 z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
3342 if (z->img_comp[i].raw_data == NULL)
3343 return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3344 // align blocks for idct usin